In [15]:
import pandas as pd
import numpy as np
import emd
import pylab as plt
import statsmodels.api as sm
import plotly.express as px
import os
from sklearn import linear_model
np.random.seed(15)

In [16]:
def filter_and_regress(combined_data, country, 
        low_pass_percent=0.2, med_pass_percent=0.5, high_pass_percent=0.7):
        
        combined_data = combined_data[["Last Price", "Price", "Actual"]]
        combined_data = combined_data.dropna(axis=0)
        elec_price = combined_data["Price"].to_numpy()
        lng_price = combined_data["Last Price"].to_numpy()
        demand = combined_data["Actual"].to_numpy()
        # plot and transform all of the data for electricity pricing
        #plt.figure()
        #plt.plot(elec_price, "k")

        imf, noise = emd.sift.complete_ensemble_sift(elec_price, ensemble_noise=1)
        # create the pass thresholds based on the input percentages
        low_pass_thresh_elec = int(np.ceil(low_pass_percent * imf.shape[1]))
        med_pass_thresh_elec = int(np.ceil(med_pass_percent * imf.shape[1]))
        high_pass_thresh_elec = int(np.ceil(high_pass_percent * imf.shape[1]))
        #emd.plotting.plot_imfs(imf)

        IP, IF, IA = emd.spectra.frequency_transform(imf, 2156, "hilbert")
        # plot and transform all of the data for LNG prices
        #plt.figure()
        #plt.plot(lng_price, "k")

        lng_imf, lng_noise = emd.sift.complete_ensemble_sift(lng_price, ensemble_noise=1)
        low_pass_thresh_lng = int(np.ceil(low_pass_percent * lng_imf.shape[1]))
        med_pass_thresh_lng = int(np.ceil(med_pass_percent * lng_imf.shape[1]))
        high_pass_thresh_lng = int(np.ceil(high_pass_percent * lng_imf.shape[1]))

        #emd.plotting.plot_imfs(lng_imf)

        IP, IF, IA = emd.spectra.frequency_transform(imf, 2156, "hilbert")

        demand_imf, demand_noise = emd.sift.complete_ensemble_sift(demand, ensemble_noise=1)
        low_pass_thresh_demand = int(np.ceil(low_pass_percent * demand_imf.shape[1]))
        med_pass_thresh_demand = int(np.ceil(med_pass_percent * demand_imf.shape[1]))
        high_pass_thresh_demand = int(np.ceil(high_pass_percent * demand_imf.shape[1]))
        
        low_pass_elec = imf[:, low_pass_thresh_elec:]
        low_pass_means_elec = np.apply_along_axis(np.mean, 1, low_pass_elec)

        low_pass_lng = lng_imf[:, low_pass_thresh_lng:]
        low_pass_means_lng = np.apply_along_axis(np.mean, 1, low_pass_lng)

        low_pass_demand = demand_imf[:, low_pass_thresh_demand:]
        low_pass_means_demand = np.apply_along_axis(np.mean, 1, low_pass_demand)

        #px.scatter(x=low_pass_means_elec, y=low_pass_means_lng)


        med_pass_elec = imf[:, med_pass_thresh_elec:]
        med_pass_means_elec = np.apply_along_axis(np.mean, 1, med_pass_elec)

        med_pass_lng = lng_imf[:, med_pass_thresh_lng:]
        med_pass_means_lng = np.apply_along_axis(np.mean, 1, med_pass_lng)

        med_pass_demand = demand_imf[:, med_pass_thresh_demand:]
        med_pass_means_demand = np.apply_along_axis(np.mean, 1, med_pass_demand)

        #px.scatter(x=med_pass_means_elec, y=med_pass_means_lng)
        
        high_pass_elec = imf[:, high_pass_thresh_elec:]
        high_pass_means_elec = np.apply_along_axis(np.mean, 1, high_pass_elec)

        high_pass_lng = lng_imf[:, high_pass_thresh_lng:]
        high_pass_means_lng = np.apply_along_axis(np.mean, 1, high_pass_lng)

        high_pass_demand = lng_imf[:, high_pass_thresh_demand:]
        high_pass_means_demand = np.apply_along_axis(np.mean, 1, high_pass_demand)
        
        #px.scatter(x=high_pass_means_elec, y=high_pass_means_lng)
        X_low = pd.DataFrame({"LNG": low_pass_means_lng, "Demand": low_pass_means_demand})
        X_low_log = X_low.copy()
        X_low_log["LNG"] = X_low_log["LNG"].apply(lambda x: np.log(x+15))
        low_model = linear_model.LinearRegression().fit(X_low_log, np.log(low_pass_means_elec+15))
        print("Low thresh LNG coefficient = {}, Demand Coefficient = {}".format(low_model.coef_[0], low_model.coef_[1]))

        X_med = pd.DataFrame({"LNG": med_pass_means_lng, "Demand": med_pass_means_demand})
        X_med_log = X_med.copy()
        X_med_log["LNG"] = X_med_log["LNG"].apply(lambda x: np.log(x+15))
        med_model = linear_model.LinearRegression().fit(X_med_log, np.log(med_pass_means_elec + 15))
        print("Med thresh LNG coefficient = {}, Demand Coefficient = {}".format(med_model.coef_[0], med_model.coef_[1]))

        X_high = pd.DataFrame({"LNG": high_pass_means_lng, "Demand": high_pass_means_demand})
        X_high_log = X_high.copy()
        X_high_log["LNG"] = X_high_log["LNG"].apply(lambda x: np.log(x + 15))
        high_model = linear_model.LinearRegression().fit(X_high_log, np.log(high_pass_means_elec + 15))
        print("High thresh LNG coefficient = {},  Demand Coefficent = {}".format(high_model.coef_[0], high_model.coef_[1]))

In [17]:
#filter_and_regress(pd.read_csv("./Data/Spain/combined_data.csv"), "Spain")
#data = pd.read_csv("./Data/Germany/combined.csv")
#filter_and_regress(data, "Germany")

In [18]:
# run regressions based on the timescales of COVID and war in Ukraine
def timeperiod_differences(combined_data_path, country_name):
        # these serve as best guesses, change at will
        COVID_START = "2020-03-01"
        WAR_START = "2022-02-01"

        # read in the data from the combined dataset
        data = pd.read_csv(combined_data_path)
        data["Date"] = pd.to_datetime(data["Date"])
        pre_covid = data[data["Date"] < COVID_START]

        covid = data[data["Date"] > COVID_START]
        covid = covid[covid["Date"] < WAR_START]

        war = data[data["Date"] > WAR_START]

        # run the regressions on the given datasets
        print("Pre-COVID in {}".format(country_name))
        filter_and_regress(pre_covid, country_name)

        print("COVID Era in {}".format(country_name))
        filter_and_regress(covid, country_name)

        print("War in Ukraine Era in {}".format(country_name))
        filter_and_regress(war, country_name)





In [19]:
timeperiod_differences("./Data/Spain/combined_data.csv", "Spain")

Pre-COVID in Spain
Low thresh LNG coefficient = 1.0748126399950275, Demand Coefficient = 6.0957553232078786e-05
Med thresh LNG coefficient = 1.023987501560938, Demand Coefficient = 5.610048545688719e-05
High thresh LNG coefficient = 1.0561485318083226,  Demand Coefficent = 0.010819858228338416
COVID Era in Spain
Low thresh LNG coefficient = 1.4016396631146426, Demand Coefficient = 8.132981188537584e-05
Med thresh LNG coefficient = 1.3033107510133919, Demand Coefficient = 6.738951420927332e-05
High thresh LNG coefficient = 1.71424064407906,  Demand Coefficent = -0.016773216915330492
War in Ukraine Era in Spain
Low thresh LNG coefficient = 0.09700499272409663, Demand Coefficient = 9.12896916043369e-05
Med thresh LNG coefficient = 0.10649282087041193, Demand Coefficient = 6.653890769033451e-05
High thresh LNG coefficient = 2.4111618940075803,  Demand Coefficent = -0.06179772053533062


In [20]:
timeperiod_differences("./Data/Netherlands/combined_data.csv", "Netherlands")

Pre-COVID in Netherlands
Low thresh LNG coefficient = 1.0255695102534281, Demand Coefficient = 0.0001647797777752545
Med thresh LNG coefficient = 0.9962958555299115, Demand Coefficient = 0.00014758067669396426
High thresh LNG coefficient = -2.561320208617062,  Demand Coefficent = 0.209828782000703
COVID Era in Netherlands
Low thresh LNG coefficient = 1.2627204344073053, Demand Coefficient = 0.00034711816701089594
Med thresh LNG coefficient = 1.176212829083994, Demand Coefficient = 0.0002808086732449813
High thresh LNG coefficient = 1.2936800134815396,  Demand Coefficent = -0.002899322183743443
War in Ukraine Era in Netherlands
Low thresh LNG coefficient = 0.9169926217132679, Demand Coefficient = 0.0006600966897317567


  med_model = linear_model.LinearRegression().fit(X_med_log, np.log(med_pass_means_elec + 15))


ValueError: Input y contains NaN.

In [None]:
timeperiod_differences("./Data/Germany/combined_data.csv", "Germany")

Pre-COVID in Germany
Low thresh LNG coefficient = 0.4811061528595858, Demand Coefficient = 2.8718952684058698e-05
Med thresh LNG coefficient = 0.47557108741393644, Demand Coefficient = 2.929671697746013e-05
High thresh LNG coefficient = 0.6201098647713124,  Demand Coefficent = 0.01574170854404107
COVID Era in Germany
Low thresh LNG coefficient = 1.31565309147108, Demand Coefficient = 5.027830489509379e-05
Med thresh LNG coefficient = 1.3328411063059178, Demand Coefficient = 5.107458209133923e-05
High thresh LNG coefficient = 1.0615515205670298,  Demand Coefficent = 0.015670566229691985
War in Ukraine Era in Germany
Low thresh LNG coefficient = 0.9348640834803308, Demand Coefficient = 4.0338906916548105e-05
Med thresh LNG coefficient = 0.949901450745462, Demand Coefficient = 4.1968820460070125e-05
High thresh LNG coefficient = 0.037662041862716586,  Demand Coefficent = 0.024663783415128485
