In [16]:
from electricity_price_predictor.data import get_shifted_load, get_shifted_price, get_weather, get_holidays, get_days_dummies
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
import statsmodels.formula.api as smf
import pandas as pd

## Load datasets

In [2]:
last_date = '2020-11-23 16:00:00'

In [3]:
dayofweek = get_days_dummies()
holidays = get_holidays()
weather = get_weather().loc[:last_date]
price = get_shifted_price().loc[:last_date]
load = get_shifted_load()

In [5]:
# price and load into one df
df = price.merge(load, on='time')

## Downsample

In [6]:
weather = weather.resample('D').mean()
weather = weather.reset_index().drop('dt', axis=1)

In [7]:
df = df.resample('D').mean()
df = df.reset_index().drop('time', axis=1)

In [8]:
dayofweek = dayofweek.reset_index().drop('index', axis=1)
holidays = holidays.reset_index().drop('index', axis=1)

In [9]:
print(holidays.shape)
print(dayofweek.shape)
print(weather.shape)
print(df.shape)

(2154, 2)
(2154, 7)
(2154, 5)
(2154, 2)


## Merging dataframes

In [10]:
df = df.merge(
    holidays, right_index= True, left_index=True).merge(
    dayofweek, right_index= True, left_index=True).merge(
    weather, right_index= True, left_index= True).drop('holiday_name', axis=1)

df['holiday_bool'] = df['holiday_bool'].astype('int64')

In [11]:
df.head()

Unnamed: 0,price,load,holiday_bool,mon,tue,wed,thur,fri,sat,sun,temp,feels_like,humidity,clouds_all,wind_speed
0,16.310417,2114.208333,1,0,0,0,1,0,0,0,6.531499,0.07099,92.857741,91.531577,7.762724
1,4.970417,2360.416667,0,0,0,0,0,1,0,0,6.516418,-2.268839,84.475949,68.241743,10.727671
2,15.291667,2196.791667,0,0,0,0,0,0,1,0,4.117653,-2.365009,84.460324,42.119344,6.808548
3,21.064167,2142.25,0,0,0,0,0,0,0,1,3.413488,-1.761679,80.915235,36.723827,4.657854
4,37.9975,2601.625,0,1,0,0,0,0,0,0,4.752763,1.423782,94.189036,89.442873,2.84228


## Define features and scale

In [12]:
X = df.drop('price', axis=1)
y = df.price

In [14]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

## Model

### LinearReg sklearn

In [15]:
model_2 = LinearRegression()
model_2.fit(X_scaled, y)
model_2.score(X_scaled, y)  # R2

0.374667541977606

### smf statsmodels

In [18]:
col = df.columns[1:]

In [22]:
formula = ' + '.join(col)
formula = f"price ~ {formula}"
formula

'price ~ load + holiday_bool + mon + tue + wed + thur + fri + sat + sun + temp + feels_like + humidity + clouds_all + wind_speed'

In [23]:
model = smf.ols(formula=formula, data=df).fit()

In [24]:
model.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.375
Model:,OLS,Adj. R-squared:,0.371
Method:,Least Squares,F-statistic:,98.83
Date:,"Fri, 27 Nov 2020",Prob (F-statistic):,2.2999999999999997e-207
Time:,09:17:00,Log-Likelihood:,-8017.9
No. Observations:,2154,AIC:,16060.0
Df Residuals:,2140,BIC:,16140.0
Df Model:,13,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,36.9093,6.276,5.881,0.000,24.602,49.217
load,0.0141,0.001,9.472,0.000,0.011,0.017
holiday_bool,-2.5585,1.345,-1.902,0.057,-5.196,0.079
mon,5.7008,1.110,5.134,0.000,3.523,7.878
tue,5.4811,1.143,4.796,0.000,3.240,7.722
wed,5.6921,1.148,4.957,0.000,3.440,7.944
thur,5.6502,1.157,4.885,0.000,3.382,7.919
fri,6.2233,1.066,5.837,0.000,4.132,8.314
sat,4.7057,0.943,4.988,0.000,2.856,6.556

0,1,2,3
Omnibus:,53.74,Durbin-Watson:,0.438
Prob(Omnibus):,0.0,Jarque-Bera (JB):,58.195
Skew:,0.37,Prob(JB):,2.31e-13
Kurtosis:,3.318,Cond. No.,9.33e+18
