In [9]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import datetime
# import graph objects as "go"
import plotly
import plotly.graph_objs as go
import plotly.express as px

from sklearn.metrics import mean_squared_error

In [10]:
dateparse = lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S')

imb_2019 = pd.read_csv("../data/merged_final_1h.csv", index_col=0, parse_dates=['dt_start_utc'], date_parser=dateparse)

In [11]:
def fill_missing(data):
    data = data.resample('1H').median()
    data = data.interpolate(method='time', limit_direction='both')
    return data

In [12]:
def train_test_split(df):
    n = int(len(df)*0.7)
    train, test = df.iloc[:n], df.iloc[n:]

    return train,test

In [13]:
train_2019, test_2019 = train_test_split(imb_2019[['imbalance_price_target']])

In [14]:
train_2019.head()

Unnamed: 0_level_0,imbalance_price_target
dt_start_utc,Unnamed: 1_level_1
2019-01-01 00:00:00,-3.465
2019-01-01 01:00:00,-3.735
2019-01-01 02:00:00,-9.25
2019-01-01 03:00:00,11.355
2019-01-01 04:00:00,-4.925


In [15]:
test_2019['imbalance_1h_shift'] = test_2019['imbalance_price_target'].shift(1)

In [17]:
test_2019.head()

Unnamed: 0_level_0,imbalance_price_target,imbalance_1h_shift
dt_start_utc,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-03-23 07:00:00,56.745,
2020-03-23 08:00:00,-368.605,56.745
2020-03-23 09:00:00,-88.885,-368.605
2020-03-23 10:00:00,85.235,-88.885
2020-03-23 11:00:00,40.5,85.235


In [18]:
test_2019.dropna(inplace=True)

In [26]:
test_2019.shape

(4600, 2)

In [19]:
def plot_forecast(df):
    #ploting plotly
    plt.figure(figsize=(16, 8))

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index,y=df['imbalance_1h_shift'], name='predicted', line=dict(color='blue', width=1)))
    fig.add_trace(go.Scatter(x=df.index,y=df['imbalance_price_target'], name='actual', opacity=0.5, line=dict(color='red', width=4)))
    fig.show()
    #print out the RMSE
    mse =mean_squared_error(df['imbalance_price_target'].values, df['imbalance_1h_shift'].values)
    print('rmse = ', np.sqrt(mse));

In [20]:
plot_forecast(test_2019)

rmse =  60.2655260469197


<Figure size 1152x576 with 0 Axes>

In [21]:
def get_forecast_classification(forecast_df):
    # create dataframe of true and predicted values for  5 classes
    forecast_df['class_act_5c'] = forecast_df['imbalance_price_target'].apply(lambda x: 0 if x < -100 else(1 if x < -5 else(2 if x <= 5 else(3 if x <= 100 else 4))))
    forecast_df['class_pred_5c'] = forecast_df['imbalance_1h_shift'].apply(lambda x: 0 if x < -100 else(1 if x < -5 else(2 if x <= 5 else(3 if x <= 100 else 4))))
    return forecast_df

In [22]:
df_forecast_baseline = get_forecast_classification(test_2019)

In [23]:
df_forecast_baseline

Unnamed: 0_level_0,imbalance_price_target,imbalance_1h_shift,class_act_5c,class_pred_5c
dt_start_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-03-23 08:00:00,-368.605,56.745,0,3
2020-03-23 09:00:00,-88.885,-368.605,1,0
2020-03-23 10:00:00,85.235,-88.885,3,1
2020-03-23 11:00:00,40.500,85.235,3,3
2020-03-23 12:00:00,29.705,40.500,3,3
...,...,...,...,...
2020-09-30 19:00:00,16.450,3.065,3,2
2020-09-30 20:00:00,26.050,16.450,3,3
2020-09-30 21:00:00,56.265,26.050,3,3
2020-09-30 22:00:00,41.335,56.265,3,3


In [24]:
df_forecast_baseline.to_csv('forecast_baseline_1h.csv')

In [25]:
def classification_accuracy(df, column_name):
    accuracy_all=[]
    for i in range(5):
        sum = (df[(df['class_act_5c'] == i) & (df['class_pred_5c'] == i)]).count()[0]
        accuracy = sum/len(df[df['class_act_5c'] == i])
        accuracy_all.append(np.round(accuracy*100,1))
    accuracy_all = pd.DataFrame(accuracy_all, columns=[column_name])
    return accuracy_all

In [26]:
accuracy_1h_1f = classification_accuracy(df_forecast_baseline, 'acc_baseline')

In [28]:
accuracy_1h_1f.to_csv('accuracy_baseline_1h_1f.csv')

In [48]:
def plot_classification(df, title_text):
    fig = px.bar(y=df.index, x=df.iloc[:,0], orientation='h', text=df.iloc[:,0],labels={
            "x": "Accuracy (%)",
            "y": "Imbalance Energy Price"
                    })#, title="Univariate SARIMA 1 Hour Forecast")
    fig.update_traces( textposition='outside')
    fig.update_layout(
        yaxis = dict(
            tickmode = 'array',
            tickvals = [0, 1, 2, 3, 4],
            ticktext = ['extreme negative', 'low negative', 'close to 0', 'low positive', 'extreme positive']
        ), title_text=title_text, title_x=0.5
    )

    fig.show()

In [49]:
plot_classification(accuracy_1h_1f,'Univariate Baseline')