In [28]:
import pandas as pd
import numpy as np
import emd
import pylab as plt
import statsmodels.api as sm
import plotly.express as px
import os
from sklearn import linear_model
np.random.seed(15)

In [29]:
def filter_and_regress(combined_data, country, 
        low_pass_percent=0.2, med_pass_percent=0.15, high_pass_percent=0.7):
        
        combined_data = combined_data[["Last Price", "Price", "Actual"]]
        combined_data = combined_data.dropna(axis=0)
        elec_price = combined_data["Price"].to_numpy()
        lng_price = combined_data["Last Price"].to_numpy()
        demand = combined_data["Actual"].to_numpy()
        # plot and transform all of the data for electricity pricing
        #plt.figure()
        #plt.plot(elec_price, "k")

        imf, noise = emd.sift.complete_ensemble_sift(elec_price, ensemble_noise=1)
        # create the pass thresholds based on the input percentages
        low_pass_thresh_elec = int(np.ceil(low_pass_percent * imf.shape[1]))
        med_pass_thresh_elec = int(np.ceil(med_pass_percent * imf.shape[1]))
        high_pass_thresh_elec = int(np.ceil(high_pass_percent * imf.shape[1]))
        #emd.plotting.plot_imfs(imf)

        IP, IF, IA = emd.spectra.frequency_transform(imf, 2156, "hilbert")
        # plot and transform all of the data for LNG prices
        #plt.figure()
        #plt.plot(lng_price, "k")

        lng_imf, lng_noise = emd.sift.complete_ensemble_sift(lng_price, ensemble_noise=1)
        low_pass_thresh_lng = int(np.ceil(low_pass_percent * lng_imf.shape[1]))
        med_pass_thresh_lng = int(np.ceil(med_pass_percent * lng_imf.shape[1]))
        high_pass_thresh_lng = int(np.ceil(high_pass_percent * lng_imf.shape[1]))

        #emd.plotting.plot_imfs(lng_imf)

        IP, IF, IA = emd.spectra.frequency_transform(imf, 2156, "hilbert")

        demand_imf, demand_noise = emd.sift.complete_ensemble_sift(demand, ensemble_noise=1)
        low_pass_thresh_demand = int(np.ceil(low_pass_percent * demand_imf.shape[1]))
        med_pass_thresh_demand = int(np.ceil(med_pass_percent * demand_imf.shape[1]))
        high_pass_thresh_demand = int(np.ceil(high_pass_percent * demand_imf.shape[1]))
        
        low_pass_elec = imf[:, low_pass_thresh_elec:]
        low_pass_means_elec = np.apply_along_axis(np.mean, 1, low_pass_elec)

        low_pass_lng = lng_imf[:, low_pass_thresh_lng:]
        low_pass_means_lng = np.apply_along_axis(np.mean, 1, low_pass_lng)

        low_pass_demand = demand_imf[:, low_pass_thresh_demand:]
        low_pass_means_demand = np.apply_along_axis(np.mean, 1, low_pass_demand)

        #px.scatter(x=low_pass_means_elec, y=low_pass_means_lng)


        med_pass_elec = imf[:, med_pass_thresh_elec:]
        med_pass_means_elec = np.apply_along_axis(np.mean, 1, med_pass_elec)

        med_pass_lng = lng_imf[:, med_pass_thresh_lng:]
        med_pass_means_lng = np.apply_along_axis(np.mean, 1, med_pass_lng)

        med_pass_demand = demand_imf[:, med_pass_thresh_demand:]
        med_pass_means_demand = np.apply_along_axis(np.mean, 1, med_pass_demand)

        #px.scatter(x=med_pass_means_elec, y=med_pass_means_lng)
        
        high_pass_elec = imf[:, high_pass_thresh_elec:]
        high_pass_means_elec = np.apply_along_axis(np.mean, 1, high_pass_elec)

        high_pass_lng = lng_imf[:, high_pass_thresh_lng:]
        high_pass_means_lng = np.apply_along_axis(np.mean, 1, high_pass_lng)

        high_pass_demand = lng_imf[:, high_pass_thresh_demand:]
        high_pass_means_demand = np.apply_along_axis(np.mean, 1, high_pass_demand)
        
        #px.scatter(x=high_pass_means_elec, y=high_pass_means_lng)
        X_low = pd.DataFrame({"LNG": low_pass_means_lng, "Demand": low_pass_means_demand})
        X_low_log = X_low.copy()
        X_low_log["LNG"] = X_low_log["LNG"].apply(lambda x: np.log(x+15))
        low_model = linear_model.LinearRegression().fit(X_low_log, np.log(low_pass_means_elec+15))
        print("Low pass LNG coefficient = {}, Demand Coefficient = {}".format(low_model.coef_[0], low_model.coef_[1]))

        X_med = pd.DataFrame({"LNG": med_pass_means_lng, "Demand": med_pass_means_demand})
        X_med_log = X_med.copy()
        X_med_log["LNG"] = X_med_log["LNG"].apply(lambda x: np.log(x+15))
        med_model = linear_model.LinearRegression().fit(X_med_log, np.log(med_pass_means_elec + 15))
        print("Med Pass LNG coefficient = {}, Demand Coefficient = {}".format(med_model.coef_[0], med_model.coef_[1]))

        X_high = pd.DataFrame({"LNG": high_pass_means_lng, "Demand": high_pass_means_demand})
        X_high_log = X_high.copy()
        X_high_log["LNG"] = X_high_log["LNG"].apply(lambda x: np.log(x + 15))
        high_model = linear_model.LinearRegression().fit(X_high_log, np.log(high_pass_means_elec + 15))
        print("High Pass LNG coefficient = {},  Demand Coefficent = {}".format(high_model.coef_[0], high_model.coef_[1]))

In [30]:
#filter_and_regress(pd.read_csv("./Data/Spain/combined_data.csv"), "Spain")
#data = pd.read_csv("./Data/Germany/combined.csv")
#filter_and_regress(data, "Germany")

Low pass LNG coefficient = 1.1178447131860858, Demand Coefficient = 6.052536255861113e-05
Med Pass LNG coefficient = 1.128345509378336, Demand Coefficient = 6.204091786626618e-05
High Pass LNG coefficient = 0.4502502087443484,  Demand Coefficent = 0.02487579046613835


In [31]:
# run regressions based on the timescales of COVID and war in Ukraine
def timeperiod_differences(combined_data_path, country_name):
        # these serve as best guesses, change at will
        COVID_START = "2020-03-01"
        WAR_START = "2022-02-01"

        # read in the data from the combined dataset
        data = pd.read_csv(combined_data_path)
        data["Date"] = pd.to_datetime(data["Date"])
        pre_covid = data[data["Date"] < COVID_START]

        covid = data[data["Date"] > COVID_START]
        covid = covid[covid["Date"] < WAR_START]

        war = data[data["Date"] > WAR_START]

        # run the regressions on the given datasets
        print("Pre-COVID in {}".format(country_name))
        filter_and_regress(pre_covid, country_name)

        print("COVID Era in {}".format(country_name))
        filter_and_regress(covid, country_name)

        print("War in Ukraine Era in {}".format(country_name))
        filter_and_regress(war, country_name)





In [32]:
timeperiod_differences("./Data/Spain/combined_data.csv", "Spain")

Pre-COVID in Spain
Low pass LNG coefficient = 1.072856147305029, Demand Coefficient = 6.888912625139737e-05
Med Pass LNG coefficient = 1.0823580557423544, Demand Coefficient = 6.99303265743385e-05
High Pass LNG coefficient = 0.1468254175530575,  Demand Coefficent = 0.05433847569687829
COVID Era in Spain
Low pass LNG coefficient = 1.396095585112108, Demand Coefficient = 9.507636079252136e-05
Med Pass LNG coefficient = 1.4091627720153255, Demand Coefficient = 9.768732214516367e-05
High Pass LNG coefficient = 1.1377766622230212,  Demand Coefficent = 0.007313775059757302
War in Ukraine Era in Spain
Low pass LNG coefficient = 0.09519674591617001, Demand Coefficient = 9.426972987011496e-05
Med Pass LNG coefficient = 0.09517290795599745, Demand Coefficient = 9.744991606003545e-05
High Pass LNG coefficient = 0.6748042528369192,  Demand Coefficent = -0.013319016047713648


In [33]:
timeperiod_differences("./Data/Netherlands/combined_data.csv", "Netherlands")

Pre-COVID in Netherlands
Low pass LNG coefficient = 1.015315624687126, Demand Coefficient = 0.0001640938432368137
Med Pass LNG coefficient = 1.016005998533483, Demand Coefficient = 0.00016666162263245495
High Pass LNG coefficient = -2.6521460592283264,  Demand Coefficent = 0.21388872283319127
COVID Era in Netherlands
Low pass LNG coefficient = 1.2711374764595895, Demand Coefficient = 0.00034245955501468816
Med Pass LNG coefficient = 1.288180860161182, Demand Coefficient = 0.0003520302476625618
High Pass LNG coefficient = 0.3353049168276824,  Demand Coefficent = 0.03869696131128682
War in Ukraine Era in Netherlands
Low pass LNG coefficient = 0.8437318494473445, Demand Coefficient = 0.0006734989285205384
Med Pass LNG coefficient = 0.8728888399304444, Demand Coefficient = 0.0007178328663873201
High Pass LNG coefficient = 0.06918052017137856,  Demand Coefficent = 0.022073395616934655
