In [1]:
import numpy as np
import pandas as pd
import os

import matplotlib.pyplot as plt

from darts.dataprocessing.transformers import Scaler
from darts.models import RNNModel
from darts.datasets import WeatherDataset
from darts.models import LinearRegressionModel
import darts.metrics as metrics
from darts.datasets import AirPassengersDataset
import datetime

from darts import TimeSeries

In [2]:
# data preperation
bld_pd=pd.read_csv(r'/root/autodl-tmp/data/load_prediction_base/BLD_Sum.csv')
bld_pd.sort_values(by='DateTime')
bld_pd=bld_pd.drop(columns=['RealPower_before_scaling'])
bld=TimeSeries.from_dataframe(bld_pd,time_col="DateTime",freq="15min",fill_missing_dates=True)

# data split
train_start=pd.Timestamp(2017,1,1,0,0)
train_end=pd.Timestamp(2018,12,31,23,45)

pred_start=pd.Timestamp(2019,1,1,0,0)
pred_end=pd.Timestamp(2019,12,31,23,45)



In [3]:
target_key='RealPower'
lags=672
enable_past_covariates=False
enable_retrain=False
prediction_horizon=96
output_chunk_length=96
stride=prediction_horizon # suggest not to change

serial=1 # set for every running

In [4]:
class Simple_LinearRegression():
    

    def __init__(self,series,serial,target_key,train_start,train_end,
                 pred_start,pred_end,verbose,
                 lags,prediction_horizon,output_chunk_length,stride,save_path=None,
                 save_model=True,save_metrics=True,past_covariates=None,
                 enable_past_covariates=False,enable_retrain=False) -> None:
        self.default_describe='_'.join([str(serial),enable_past_covariates*'PCo',\
            'Lag'+str(lags),enable_retrain*'ReT'])

        self.save_path=os.path.join(save_path,serial)
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
        self.pred_start=pred_start
        self.prediction_horizon=prediction_horizon
        self.stride=stride
        self.enable_past_covariates=enable_past_covariates
        self.enable_retrain=enable_retrain
        self.verbose=verbose
        #self.save_path=save_path
        self.save_model=save_model
        self.save_metrics=save_metrics
        self.lags=lags
        
        self.trained=False
        self.prediction=None
        
        if past_covariates!=None:
            past_covariates=past_covariates
        else:
            past_covariates=['is_holiday','temp', 'feels_like', 'temp_min', 'temp_max']
            '''
            'hour_cos', 'hour_sin', 'dayofweek_cos',
                'dayofweek_sin', 'quarter_cos', 'quarter_sin', 'month_cos', 'month_sin',
                'dayofmonth_cos', 'dayofmonth_sin',
            '''

        all_columns=set(series.columns)
        
        self.series_past_covariates=None
        if enable_past_covariates==True:
            to_drop=list(all_columns-set(past_covariates))
            print(to_drop)
            self.series_past_covariates=series.drop_columns(to_drop)
        
        self.series_train=series[train_start:train_end][target_key]
        self.series_pred=series[:pred_end][target_key]        
        
        if enable_past_covariates==True:
            self.model=LinearRegressionModel(
                                lags=lags, 
                                lags_past_covariates=lags, 
                                output_chunk_length=output_chunk_length, 
                                ) 
        else:
            self.model=LinearRegressionModel(
                                lags=lags, 
                                output_chunk_length=output_chunk_length, 
                                ) 



    def train(self):
        if self.enable_past_covariates==True:
            self.model.fit(
                series=self.series_train,
                past_covariates=self.series_past_covariates,
            )
        else:
            self.model.fit(
                series=self.series_train,
            )
        self.trained=True
        if self.save_model:
            self.model.save(os.path.join(self.save_path,self.default_describe+'model.pkl'))
    
    def predict(self):
        if not self.trained:
            Warning("Please train model first")
        else:
            pred_start=self.pred_start-datetime.timedelta(hours=(self.prediction_horizon-1)/4)
            self.prediction=self.model.historical_forecasts(
                self.series_pred,
                start=pred_start,
                forecast_horizon=self.prediction_horizon,
                stride=self.stride,
                retrain=self.enable_retrain*self.prediction_horizon*7,
                verbose=self.verbose,
                last_points_only=True,
                past_covariates=self.series_past_covariates,
                train_length=365*96 # wrong configuration here, should be set as none to utilize all available historical data
            )
        self.prediction.to_csv(os.path.join(self.save_path,self.default_describe+'prediction.csv'))
            
    def cal_metrics(self):
        assert self.prediction!=None
        metrics_method_dic={
            'CV':metrics.coefficient_of_variation,
            'MAE':metrics.mae,
            'MAPE':metrics.mape,
            'OPE':metrics.ope,
            'RMSE':metrics.rmse,
            'MSE':metrics.mse,
            'MARRE':metrics.marre,
            'MASE':metrics.mase,
            'R2':metrics.r2_score,
            'SMAPE':metrics.smape,
        }
        metrics_dic={
            'start_time':self.prediction.time_index[0],
            'end_time':self.prediction.time_index[-1],
            'n':len(self.prediction.time_index),
        }
        
        for metric in metrics_method_dic.keys():
            try:
                if metric=='MASE':
                    value=metrics_method_dic[metric](self.series_pred,self.prediction,intersect=True,
                                                              insample=self.series_train, m=96*7)
                    print({metric: value})
                    metrics_dic.update({metric: value})
                else:
                    value=metrics_method_dic[metric](self.series_pred,self.prediction,intersect=True)
                    print({metric: value})
                    metrics_dic.update({metric: value})
            except:
                print("Fail to calculate metric: {} of model {}".format(metric,self.default_describe))
                
        metrics_df=pd.DataFrame([metrics_dic]).T
        metrics_df.to_csv(os.path.join(self.save_path,self.default_describe+'metrics.csv'))
   
        
        

In [84]:
test=Simple_LinearRegression(
    series=bld,
    serial='0_no_PCo',
    target_key='RealPower',
    train_start=train_start,
    train_end=train_end,
    pred_start=pred_start,
    pred_end=pred_end,
    verbose=True,
    lags=96*7,
    prediction_horizon=96,
    output_chunk_length=96,
    stride=1,
    save_path=r'/root/autodl-tmp/load_forecast/Linear_Regression',
    save_model=True,
    save_metrics=True,
    enable_past_covariates=False,
    enable_retrain=False
    
)
test.train()
test.predict()
test.cal_metrics()

{'CV': 9.337861080396237}
{'MAE': 4.228513156563435}
{'MAPE': 6.014341908840429}
{'OPE': 0.1895191707674868}
{'RMSE': 6.866485254773603}
{'MSE': 47.14861975402331}
{'MARRE': 5.889860107118139}
{'MASE': 0.9079816181426624}
{'R2': 0.8162193272783183}
{'SMAPE': 5.890138261300678}


In [85]:
test=Simple_LinearRegression(
    series=bld,
    serial='1_PCo_only_weather',
    target_key='RealPower',
    train_start=train_start,
    train_end=train_end,
    pred_start=pred_start,
    pred_end=pred_end,
    verbose=True,
    lags=96*7,
    prediction_horizon=96,
    output_chunk_length=96,
    stride=1,
    save_path=r'/root/autodl-tmp/load_forecast/Linear_Regression',
    save_model=True,
    save_metrics=True,
    enable_past_covariates=True,
    enable_retrain=False
    
)
test.train()
test.predict()
test.cal_metrics()

['quarter_cos', 'weather_main', 'clouds_all', 'pressure', 'hour_sin', 'dayofmonth_cos', 'rain_3h', 'month_cos', 'quarter_sin', 'humidity', 'wind_deg', 'hour_cos', 'snow_3h', 'RealPower', 'snow_1h', 'wind_speed', 'rain_1h', 'dayofweek_sin', 'dayofmonth_sin', 'month_sin', 'dayofweek_cos', 'RealPower_-0d_0h']


{'CV': 8.802542967840509}
{'MAE': 4.301270947890085}
{'MAPE': 6.004532911727786}
{'OPE': 2.486365695754783}
{'RMSE': 6.472845437814452}
{'MSE': 41.89772806183536}
{'MARRE': 5.99120381748397}
{'MASE': 0.9236047780230373}
{'R2': 0.8366867855541599}
{'SMAPE': 6.012791648127056}


In [86]:
test=Simple_LinearRegression(
    series=bld,
    serial='1_PCo_weather_n_time',
    target_key='RealPower',
    train_start=train_start,
    train_end=train_end,
    pred_start=pred_start,
    pred_end=pred_end,
    verbose=True,
    lags=96*7,
    prediction_horizon=96,
    output_chunk_length=96,
    stride=1,
    save_path=r'/root/autodl-tmp/load_forecast/Linear_Regression',
    save_model=True,
    save_metrics=True,
    enable_past_covariates=True,
    enable_retrain=False,
    past_covariates=['is_holiday','temp', 'feels_like', 'temp_min', 'temp_max',
                     'hour_cos', 'hour_sin', 'dayofweek_cos',
                    'dayofweek_sin', 'quarter_cos', 'quarter_sin', 'month_cos', 'month_sin',]  
)
test.train()
test.predict()
test.cal_metrics()

['snow_1h', 'wind_speed', 'pressure', 'weather_main', 'rain_1h', 'clouds_all', 'dayofmonth_cos', 'rain_3h', 'dayofmonth_sin', 'humidity', 'wind_deg', 'RealPower_-0d_0h', 'snow_3h', 'RealPower']


{'CV': 7.6258449721215875}
{'MAE': 3.638397314338106}
{'MAPE': 5.177709032937001}
{'OPE': 1.8680688694235452}
{'RMSE': 5.607574540404317}
{'MSE': 31.44489222619069}
{'MARRE': 5.067892756181451}
{'MASE': 0.7812670219059947}
{'R2': 0.8774309093852748}
{'SMAPE': 5.160332133127481}


In [5]:
test=Simple_LinearRegression(
    series=bld,
    serial='2_ReT_PCo_only_weather',
    target_key='RealPower',
    train_start=train_start,
    train_end=train_end,
    pred_start=pred_start,
    pred_end=pred_end,
    verbose=True,
    lags=96*7,
    prediction_horizon=96,
    output_chunk_length=96,
    stride=1,
    save_path=r'/root/autodl-tmp/load_forecast/Linear_Regression',
    save_model=True,
    save_metrics=True,
    enable_past_covariates=True,
    enable_retrain=True
)
test.train()
test.predict()
test.cal_metrics()

['snow_3h', 'quarter_cos', 'humidity', 'rain_1h', 'rain_3h', 'RealPower', 'wind_deg', 'weather_main', 'dayofweek_sin', 'clouds_all', 'wind_speed', 'snow_1h', 'RealPower_-0d_0h', 'hour_cos', 'quarter_sin', 'dayofweek_cos', 'month_cos', 'dayofmonth_cos', 'dayofmonth_sin', 'pressure', 'month_sin', 'hour_sin']




  0%|          | 0/35040 [00:00<?, ?it/s]

{'CV': 9.333786602859988}
{'MAE': 4.508815561959165}
{'MAPE': 6.521754187560691}
{'OPE': 0.027098355589890388}
{'RMSE': 6.863489136103311}
{'MSE': 47.10748312140817}
{'MARRE': 6.280290949909063}
{'MASE': 0.9681704888395528}
{'R2': 0.8163796738177282}
{'SMAPE': 6.360479595071582}


In [5]:
test=Simple_LinearRegression(
    series=bld,
    serial='2_ReT_PCo_weather_n_time',
    target_key='RealPower',
    train_start=train_start,
    train_end=train_end,
    pred_start=pred_start,
    pred_end=pred_end,
    verbose=True,
    lags=96*7,
    prediction_horizon=96,
    output_chunk_length=96,
    stride=1,
    save_path=r'/root/autodl-tmp/load_forecast/Linear_Regression',
    save_model=True,
    save_metrics=True,
    enable_past_covariates=True,
    enable_retrain=True,
    past_covariates=['is_holiday','temp', 'feels_like', 'temp_min', 'temp_max',
                     'hour_cos', 'hour_sin', 'dayofweek_cos',
                    'dayofweek_sin', 'quarter_cos', 'quarter_sin', 'month_cos', 'month_sin',]  
)
test.train()
test.predict()
test.cal_metrics()

['rain_3h', 'rain_1h', 'wind_deg', 'snow_1h', 'dayofmonth_sin', 'pressure', 'clouds_all', 'weather_main', 'RealPower_-0d_0h', 'wind_speed', 'dayofmonth_cos', 'RealPower', 'snow_3h', 'humidity']




  0%|          | 0/35040 [00:00<?, ?it/s]

{'CV': 8.680522886226987}
{'MAE': 4.158810901782991}
{'MAPE': 6.107924870908582}
{'OPE': 0.5719462101121752}
{'RMSE': 6.383119419835401}
{'MSE': 40.74421352787982}
{'MARRE': 5.79277242768872}
{'MASE': 0.8930145685579868}
{'R2': 0.8411830715143972}
{'SMAPE': 5.999870041955341}


In [6]:
test=Simple_LinearRegression(
    series=bld,
    serial='2_ReT_PCo_time',
    target_key='RealPower',
    train_start=train_start,
    train_end=train_end,
    pred_start=pred_start,
    pred_end=pred_end,
    verbose=True,
    lags=96*7,
    prediction_horizon=96,
    output_chunk_length=96,
    stride=1,
    save_path=r'/root/autodl-tmp/load_forecast/Linear_Regression',
    save_model=True,
    save_metrics=True,
    enable_past_covariates=True,
    enable_retrain=True,
    past_covariates=['is_holiday','hour_cos', 'hour_sin', 'dayofweek_cos',
                'dayofweek_sin', 'quarter_cos', 'quarter_sin', 'month_cos', 'month_sin',
                'dayofmonth_cos', 'dayofmonth_sin']  
)
test.train()
test.predict()
test.cal_metrics()

['rain_3h', 'temp_max', 'temp', 'rain_1h', 'wind_deg', 'snow_1h', 'feels_like', 'pressure', 'clouds_all', 'weather_main', 'RealPower_-0d_0h', 'wind_speed', 'temp_min', 'RealPower', 'snow_3h', 'humidity']




  0%|          | 0/35040 [00:00<?, ?it/s]