In [65]:
import pandas as pd
import numpy as np
import emd
import pylab as plt
import statsmodels.api as sm
import plotly.express as px
import os
from sklearn import linear_model

In [82]:
def filter_and_regress(combined_data, country, 
        low_pass_percent=0.2, med_pass_percent=0.5, high_pass_percent=0.8):
        
        combined_data = combined_data[["Last Price", "Price", "Actual"]]
        combined_data = combined_data.dropna(axis=0)
        elec_price = combined_data["Price"].to_numpy()
        lng_price = combined_data["Last Price"].to_numpy()
        demand = combined_data["Actual"].to_numpy()
        # plot and transform all of the data for electricity pricing
        #plt.figure()
        #plt.plot(elec_price, "k")

        imf, noise = emd.sift.complete_ensemble_sift(elec_price, ensemble_noise=1)
        # create the pass thresholds based on the input percentages
        low_pass_thresh_elec = int(np.ceil(low_pass_percent * imf.shape[1]))
        med_pass_thresh_elec = int(np.ceil(med_pass_percent * imf.shape[1]))
        high_pass_thresh_elec = int(np.ceil(high_pass_percent * imf.shape[1]))
        #emd.plotting.plot_imfs(imf)

        IP, IF, IA = emd.spectra.frequency_transform(imf, 256, "hilbert")
        # plot and transform all of the data for LNG prices
        #plt.figure()
        #plt.plot(lng_price, "k")

        lng_imf, lng_noise = emd.sift.complete_ensemble_sift(lng_price, ensemble_noise=1)
        low_pass_thresh_lng = int(np.ceil(low_pass_percent * lng_imf.shape[1]))
        med_pass_thresh_lng = int(np.ceil(med_pass_percent * lng_imf.shape[1]))
        high_pass_thresh_lng = int(np.ceil(high_pass_percent * lng_imf.shape[1]))

        #emd.plotting.plot_imfs(lng_imf)

        IP, IF, IA = emd.spectra.frequency_transform(imf, 256, "hilbert")

        demand_imf, demand_noise = emd.sift.complete_ensemble_sift(demand, ensemble_noise=1)
        low_pass_thresh_demand = int(np.ceil(low_pass_percent * demand_imf.shape[1]))
        med_pass_thresh_demand = int(np.ceil(med_pass_percent * demand_imf.shape[1]))
        high_pass_thresh_demand = int(np.ceil(high_pass_percent * demand_imf.shape[1]))
        
        low_pass_elec = imf[:, low_pass_thresh_elec:]
        low_pass_means_elec = np.apply_along_axis(np.mean, 1, low_pass_elec)
        print(low_pass_means_elec)
        print(np.log(low_pass_means_elec+5))
        low_pass_lng = lng_imf[:, low_pass_thresh_lng:]
        low_pass_means_lng = np.apply_along_axis(np.mean, 1, low_pass_lng)

        low_pass_demand = demand_imf[:, low_pass_thresh_demand:]
        low_pass_means_demand = np.apply_along_axis(np.mean, 1, low_pass_demand)

        #px.scatter(x=low_pass_means_elec, y=low_pass_means_lng)


        med_pass_elec = imf[:, med_pass_thresh_elec:]
        med_pass_means_elec = np.apply_along_axis(np.mean, 1, med_pass_elec)

        med_pass_lng = lng_imf[:, med_pass_thresh_lng:]
        med_pass_means_lng = np.apply_along_axis(np.mean, 1, med_pass_lng)

        med_pass_demand = demand_imf[:, med_pass_thresh_demand:]
        med_pass_means_demand = np.apply_along_axis(np.mean, 1, med_pass_demand)

        #px.scatter(x=med_pass_means_elec, y=med_pass_means_lng)
        
        high_pass_elec = imf[:, high_pass_thresh_elec:]
        high_pass_means_elec = np.apply_along_axis(np.mean, 1, high_pass_elec)

        high_pass_lng = lng_imf[:, high_pass_thresh_lng:]
        high_pass_means_lng = np.apply_along_axis(np.mean, 1, high_pass_lng)

        high_pass_demand = lng_imf[:, high_pass_thresh_demand:]
        high_pass_means_demand = np.apply_along_axis(np.mean, 1, high_pass_demand)
        
        #px.scatter(x=high_pass_means_elec, y=high_pass_means_lng)
        X_low = pd.DataFrame({"LNG": low_pass_means_lng, "Demand": low_pass_means_demand})
        X_low_log = X_low.copy()
        X_low_log["LNG"] = X_low_log["LNG"].apply(lambda x: np.log(x+np.min(X_low_log["LNG"])+1))
        low_model = linear_model.LinearRegression().fit(X_low_log, np.log(low_pass_means_elec+np.min(low_pass_means_elec)+1))
        print("Low pass LNG coefficient = {}, Demand Coefficient = {}".format(low_model.coef_[0], low_model.coef_[1]))

        X_med = pd.DataFrame({"LNG": med_pass_means_lng, "Demand": med_pass_means_demand})
        X_med_log = X_med.copy()
        X_med_log["LNG"] = X_med_log["LNG"].apply(lambda x: np.log(x+np.min(X_med_log["LNG"])+1))
        med_model = linear_model.LinearRegression().fit(X_med_log, np.log(med_pass_means_elec + np.min(med_pass_means_elec)+1))
        print("Med Pass coefficient = {}, Demand Coefficient".format(med_model.coef_[0], med_model.coef_[1]))

        X_high = pd.DataFrame({"LNG": high_pass_means_lng, "Demand": high_pass_means_demand})
        X_high_log = X_high.copy()
        X_high_log["LNG"] = X_high_log["LNG"].apply(lambda x: np.log(x + np.min(X_high_log["LNG"])+1))
        high_model = linear_model.LinearRegression().fit(X_high_log, np.log(high_pass_means_elec + np.min(high_pass_means_elec) + 1))
        print("High Pass coefficient = {},  Demand Coefficent = {}".format(high_model.coef_[0], high_model.coef_[1]))

In [83]:
filter_and_regress(pd.read_csv("./Data/Spain/combined_data.csv"), "Spain")
#data = pd.read_csv("./Data/Germany/combined.csv")
#filter_and_regress(data, "Germany")

[ 0.00486442 -2.01295893 -0.05603892 ... -2.58023397 -2.2264746
 -2.25025613]
[ 1.10023245 -0.01304363  1.07975598 ... -0.8680578  -0.25679677
 -0.28802364]


  print(np.log(low_pass_means_elec+3))
  low_model = linear_model.LinearRegression().fit(X_low_log, np.log(low_pass_means_elec+np.min(low_pass_means_elec)+1))


ValueError: Input X contains NaN.
LinearRegression does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:
# run regressions based on the timescales of COVID and war in Ukraine
def timeperiod_differences(combined_data_path, country_name):
        # these serve as best guesses, change at will
        COVID_START = "2020-03-01"
        WAR_START = "2022-02-01"

        # read in the data from the combined dataset
        data = pd.read_csv(combined_data_path)
        data["Date"] = pd.to_datetime(data["Date"])
        pre_covid = data[data["Date"] < COVID_START]

        covid = data[data["Date"] > COVID_START]
        covid = covid[covid["Date"] < WAR_START]

        war = data[data["Date"] > WAR_START]

        # run the regressions on the given datasets
        print("Pre-COVID in {}".format(country_name))
        filter_and_regress(pre_covid, country_name)

        print("COVID Era in {}".format(country_name))
        filter_and_regress(covid, country_name, med_pass_threshold=8, high_pass_threshold=11)

        print("War in Ukraine Era in {}".format(country_name))
        filter_and_regress(war, country_name, med_pass_threshold=6, high_pass_threshold=9)





In [None]:
timeperiod_differences("./Data/Spain/combined_data.csv", "Spain")

Pre-COVID in Spain
Low pass coefficient = 2.4450979909396375
Med Pass coefficient = 2.443228107736556
High Pass coefficient = 2.45101590117428
COVID Era in Spain
Low pass coefficient = 2.02079257747762
Med Pass coefficient = 1.973463297130555
High Pass coefficient = 1.946029828940902
War in Ukraine Era in Spain
Low pass coefficient = 1.5160641031776962
Med Pass coefficient = 1.5153160382040447
High Pass coefficient = 1.5129364671372045
