In [1]:
from functions import *
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import pickle
import plotly.express as px
import plotly.graph_objects as go

import dash
from dash import html,Input,Output,dcc
from dash.dependencies import Input, Output, State
import geopandas as gpd
from datetime import date
country_dict = {
"BG":"Bulgaria",
"GR":"Greece",
"HR":"Croatia",
"RO":"Romania",
"RS":"Republic of Serbia",
"SI":"Slovenia",
}
path="../"
def load_augumented(code):
    file = path + 'data/augumented/' + code + '.pkl'
    with open(file, 'rb') as f:
        df = pickle.load(f)
    return df.drop(columns=["Load-1","Load-24"])
def day_split(data_country, features, day):
    data_country=data_country.dropna()
    y = data_country['DayAheadPrice']
    x = data_country[features]
    x_train, x_test, y_train, y_test = x[x.index<day], x.loc[day], y[y.index<day], y.loc[day]
    print("AJLKAJLKAJ")
    return x_train, x_test, y_train, y_test
def train_model_country_day(data_country_input, day, n_features,model_type="rf", model_parameters=None):
    """
    Train a ML model. This function does the following:
        1. rescales the input data
        2. train test split
        3. feature selection on the training data
        4. train the model: either NN or RF
    
    Parameters:
        model_type: 'rf' | 'nn' | 'lr', str
            type of the model to train, 'rf' for random forest, 'lr' for linear regression and 'nn' for neural network
        data_country: DataFrame
            cleaned data for a given country (no Nans etc)
        n_features: int
            number of best features to select
        model_parameters: dict
            dictionary with model parameters, which are parameters for either 
            MLPRegressor or RandomForestRegressor from sklearn
    
    
    Out:
        model: object
            sklearn model
        params: DataFrame
            parameters used for rescaling the data; predictions have to be scaled back 
            for error calculation and out-of-sample new data must be scaled using these params
        selected_features: list or Index, not sure
            features used in training this model
        err: Series
            errors for predictions
    """
    data_country = data_country_input.copy()
    data_country, params = rescale(data_country)
    x_train, x_test, y_train, y_test = day_split(data_country, data_country.columns[1:],day)
    selected_features = get_best_features(x_train, n_features)

    x_train = x_train[selected_features]
    x_test = x_test[selected_features]

    if model_type.lower() == 'nn':
        if model_parameters == None:
            # these parameters worked okay for project 1
            model_parameters = {
                'hidden_layer_sizes': (16,16,14)
            }
        model = MLPRegressor(**model_parameters)
    elif model_type.lower() == 'lr':
       model = linear_model.LinearRegression() 
    elif model_type.lower() == 'rf':
        if model_parameters == None:
            # these parameters worked okay for project 1
            model_parameters = {
                'bootstrap': True,
                'min_samples_leaf': 2,
                'n_estimators': 10, 
                'min_samples_split': 3,
                'max_features': 10,
                'max_depth': 10,
                'max_leaf_nodes': None}
            
        model = RandomForestRegressor(**model_parameters)

    model.fit(x_train, y_train)
    predictions = model.predict(x_test)

    predictions = pd.Series(predictions, index=y_test.index).sort_index()
    y_test = y_test.sort_index()


    # to calculate errors, we have to rescale the data back using original parameters
    predictions = predictions * params.loc['DayAheadPrice', 'Std'] + params.loc['DayAheadPrice', 'Mean']
    y_test = y_test * params.loc['DayAheadPrice', 'Std'] + params.loc['DayAheadPrice', 'Mean']
    err = get_errors(y_test, predictions)

    #display(err)
    #plot_predictions(y_test, predictions)
    
    return model, params, selected_features, err, predictions

In [6]:
#train_model_country_day(data_country_input, day, n_features, model_type="rf", model_parameters=None)
from plotly.subplots import make_subplots
def plotDayPrediction(day,n_features,model_type='rf',exclude_features=["Price-1","Price-2"]):
        countries = ["BG", "HR", "RO","SI"]
        countries_clean=[]
        for i,code in enumerate(countries):
                try: 
                        test=load_augumented(code).loc[day]
                except: print("Date for this day does not exist") 
                else: 
                        if test.shape[0]==24: countries_clean.append(code)
        #print(countries_clean)
        n = len(countries_clean)
        #print(n)
        clist = []
        [clist.append(country_dict[x]) for x in countries_clean]
        fig = make_subplots(rows=n, cols=2,
                shared_yaxes=True, shared_xaxes=True,
                subplot_titles=clist)

        def predOneDayOneCode(code,row,col):
                df = load_augumented(code).drop(columns=exclude_features)
                #print(df.isnull().values.any())
                model, params, selected_features, err, predictions = train_model_country_day(df, day, 12, model_type="rf")
                def rolling(dataframe):
                        return dataframe.rolling(window=roll_window).mean()
                #print(selected_features)
                
                
                df = df.loc[day]

                fig.add_trace(go.Scatter(x=df.index, 
                                        y=df["DayAheadPrice"], 
                                        name="Real price",
                                        mode="lines",
                                        line_color="blue",),
                                        row=row,col=col)  
                
                fig.add_trace(go.Scatter(x=predictions.index, 
                                        y=predictions, 
                                        name="Prediction",
                                        mode="lines+markers",
                                        line_color="orange",),
                                        row=row,col=col)  #

                fig.update_layout(
                        #title={'text':'Prediction of Energy Price in %s' % (country_dict[code]),'x':0.5, 'xanchor':'center'},
                        showlegend=False)
                
                #return fig, selected_features, err
                return err.rename(country_dict[code])
        #figa=[None]*n
        #sfa=[None]*n
        erra=[None]*n
        j=1
        k=1
        for i,code in enumerate(countries_clean):
                print(k,j)
                erra[i]=predOneDayOneCode(code,k,j)
                if k==j and j==1: j=j+1
                elif k==j and j==2: j=j-1
                elif j>k: k=k+1
        #code = "SI"
        #figa, sfa, erra=predOneDayOneCode(code,1,1)
        #print("aaskdjflöaksjdlfkjaskldjfkajdfölaj")
        fig.update_layout(legend=dict(
                orientation="h",
                entrywidth=120,
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
                ))
        metr_list=[]
        [metr_list.append(x) for x in erra]
        table = pd.concat(metr_list, axis=1)
        return fig,0,table
figa, sfa, table = plotDayPrediction("2022-7-13",12)
display(table)
figa.show()

1 1
AJLKAJLKAJ
1 2
AJLKAJLKAJ
2 2
AJLKAJLKAJ
2 1
AJLKAJLKAJ


Unnamed: 0,Bulgaria,Croatia,Romania,Slovenia
Root mean squared error,88.376684,60.043348,64.372254,53.287428
Coefficient of variation RMSE,0.291814,0.159804,0.182354,0.13899
Normalized mean bias error,-0.137909,0.063255,-0.058043,0.100923
R2 score,0.185565,-0.283452,0.270924,-0.233575


In [3]:
def generate_dataframe_table(dataframe, max_rows=10):
    return html.Table([
        html.Thead(
            html.Tr([html.Th(col) for col in dataframe.columns])
        ),
        html.Tbody([
            html.Tr([
                html.Td(dataframe.iloc[i][col]) for col in dataframe.columns
            ]) for i in range(min(len(dataframe), max_rows))
        ])
    ])

generate_dataframe_table(table)

Table([Thead(Tr([Th('Croatia'), Th('Romania'), Th('Slovenia')])), Tbody([Tr([Td(44.56836338577512), Td(78.90727859894089), Td(51.25373401620993)]), Tr([Td(0.16116789862277253), Td(0.26518032536408265), Td(0.18931183488117134)]), Tr([Td(0.01295561084901721), Td(-0.020642612117643462), Td(0.01615197336752432)]), Tr([Td(0.461253242458867), Td(0.413601314145723), Td(0.04456461955132052)])])])

In [4]:
table.reset_index(names=["Metric"])

Unnamed: 0,Metric,Croatia,Romania,Slovenia
0,Root mean squared error,44.568363,78.907279,51.253734
1,Coefficient of variation RMSE,0.161168,0.26518,0.189312
2,Normalized mean bias error,0.012956,-0.020643,0.016152
3,R2 score,0.461253,0.413601,0.044565
