Для временного ряда bike-sharing построить три модели:
* Датасет аггрегировать по неделям
* Простое экспоненциальное сглаживание
* Двойное экспоненциальное сглаживание
* Тройное экспоненциальное сглаживание (тип модели additive или multiplicative) выбрать самостоятельно 


Для финального отчета сделать кросс-валидацию, пояснить выбранные метрики, а так же их финальные значения. 





In [68]:
import pandas as pd
import numpy as np 
from plotly.graph_objects import *
from plotly.offline import init_notebook_mode,iplot
from statsmodels.tsa.api import ExponentialSmoothing
from statsmodels.tsa.holtwinters import SimpleExpSmoothing  
from sklearn.preprocessing import MinMaxScaler

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.multioutput import MultiOutputRegressor
from statsmodels.tsa.holtwinters import ExponentialSmoothing

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('../datatest/bike-sharing.csv')
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,hr,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,0,6,0,1,0.24,0.2879,0.81,0.0,3,13,16
1,2,2011-01-01,1,0,1,1,0,6,0,1,0.22,0.2727,0.8,0.0,8,32,40
2,3,2011-01-01,1,0,1,2,0,6,0,1,0.22,0.2727,0.8,0.0,5,27,32
3,4,2011-01-01,1,0,1,3,0,6,0,1,0.24,0.2879,0.75,0.0,3,10,13
4,5,2011-01-01,1,0,1,4,0,6,0,1,0.24,0.2879,0.75,0.0,0,1,1


In [69]:
df = df[['dteday','cnt']]
df['dteday'] = pd.to_datetime(df['dteday'])


# Агрегация по неделям

In [70]:
df.set_index('dteday',inplace=True)
df_agg = df.resample('W').mean()
df_agg.rename(columns = {'cnt' : 'mean'}, inplace = True) # Агрегируем по среднему значению


In [71]:
df_agg = df_agg[:-1]


# Простое экспоненциальное сглаживание

In [72]:
df_ses = SimpleExpSmoothing(df_agg['mean']).fit(smoothing_level = 1/(7),
                                                       optimized = False,use_brute = True).fittedvalues

In [73]:
iplot(Figure(data=[Scatter(x = df_agg.index,
                          y = df_agg['mean'],
                          name = 'Original'),
                  Scatter(x = df_agg.index,
                         y = df_ses,
                         name = 'Simple Exponential Smoothing')]))

# Двойное экспоненциальное сглаживание

In [94]:
def create_configs():
    configs = list()
    trend_list = ['add','mul',None]
    s_list = ['add','mul',None]
    d_list = [True, False ]
    s_periods = 28
    
    for t in trend_list:
        for s in s_list:
            for d in d_list:
                config = dict( trend = t, seasonal = s, damped_trend = d )
                if s!=None:
                    config["seasonal_periods"] = s_periods
                configs.append(config)
    return configs
            

            

In [95]:
configs  = create_configs()

In [96]:
def cv_run(data,config):
    mae_list = list()
    r2_list = list()
    tscv = TimeSeriesSplit(test_size = 28)
    for train_index, test_index in tscv.split(data):
        train, test = data[train_index], data[test_index]
        mod = ExponentialSmoothing(train, 
                                   **config).fit()
        predictions = mod.forecast(len(test))
        mae_list.append(mean_absolute_error(test,predictions))
        r2_list.append(r2_score(test,predictions))
    return {'mae':np.mean(mae_list),'r2':np.mean(r2_list)}


In [97]:
results = {}


In [98]:
for i in range(len(configs)):
    config = configs[i]
    try:
        results[i] = cv_run(df_agg['mean'], config)
    except:
        results[i] = None
        continue


In [102]:
df_agg['mean']

dteday
2011-01-02     38.000000
2011-01-09     58.074074
2011-01-16     55.368098
2011-01-23     57.320261
2011-01-30     55.388489
                 ...    
2012-12-02    210.401198
2012-12-09    224.773810
2012-12-16    214.083333
2012-12-23    158.904762
2012-12-30     64.578313
Freq: W-SUN, Name: mean, Length: 105, dtype: float64

In [103]:
results

{0: None,
 1: None,
 2: None,
 3: None,
 4: None,
 5: None,
 6: None,
 7: None,
 8: None,
 9: None,
 10: None,
 11: None,
 12: None,
 13: None,
 14: None,
 15: None,
 16: None,
 17: None}

In [104]:
config_opt = configs[17]

In [105]:
train,test = df_agg[0:-28],df_agg[-28:]
mod = ExponentialSmoothing(train['mean'], 
                           **config_opt 
                           ).fit()
preds = mod.forecast(len(test))

In [106]:
iplot(Figure(data = [Scatter(y = test['mean'],name = 'Original'),Scatter(y = preds.values,name = 'Predictions')  ]))             

In [93]:
df_agg['des_mul']=ExponentialSmoothing(df_agg['AEP_MW'], trend='mul').fit().fittedvalues


iplot(Figure(data=[Scatter(x =df_agg.index,
                          y = df_agg['AEP_MW'],
                          name = 'Original'),
                  Scatter(x = df_agg.index,
                         y = df_agg['des_mul'],
                         name = 'Double Exponential Smoothing (Multiplicative)')]))

KeyError: 'AEP_MW'

# Тройное экспоненциальное сглаживание

In [43]:
def cv_run3(data,config):
    mae_list = list()
    r2_list = list()
    tscv = TimeSeriesSplit(test_size = 7)
    for train_index, test_index in tscv.split(data):
        train, test = data[train_index], data[test_index]
        mod = ExponentialSmoothing(train, 
    return {'mae':np.mean(mae_list),'r2':np.mean(r2_list)}
                                   **config).fit(optimized = True)
        predictions = mod.forecast(len(test))
        mae_list.append(mean_absolute_error(test,predictions))
        r2_list.append(r2_score(test,predictions))
    return {'mae':np.mean(mae_list),'r2':np.mean(r2_list)}

for i in range(len(configs)):
    config = configs[i]
    try:
        results[i] = cv_run3(df_agg['mean'], config)

    except:
        results[i] = None
        continue

In [45]:
results

{0: None,
 1: None,
 2: None,
 3: None,
 4: None,
 5: None,
 6: None,
 7: None,
 8: None,
 9: None,
 10: None,
 11: None,
 12: None,
 13: None,
 14: None,
 15: None,
 16: None,
 17: None}

In [406]:
'''df_agg['tes_mul'] = ExponentialSmoothing(df_agg['mean'], 
                                         trend = 'mul', 
                                         seasonal = 'mul', 
                                         seasonal_periods = 12).fit().fittedvalues

iplot(Figure(data = [Scatter(x =df_agg.index,
                          y = df_agg['mean'],
                          name = 'Original'),
                 
                  Scatter(x = df_agg.index,
                         y = df_agg['tes_mul'],
                         name = 'Tripple Exponential Smoothing (Multiplicative)')]))'''

In [407]:
train,test = df_agg[0:-50],df_agg[-50:]
mod = ExponentialSmoothing(train['mean'], 
                           trend = 'mul', 
                           seasonal_periods = 48, 
                           seasonal = 'mul' 
                           ).fit(optimized = True)

In [408]:
mod.summary()

0,1,2,3
Dep. Variable:,mean,No. Observations:,56
Model:,ExponentialSmoothing,SSE,1798.458
Optimized:,True,AIC,298.283
Trend:,Multiplicative,BIC,403.601
Seasonal:,Multiplicative,AICC,6238.283
Seasonal Periods:,48,Date:,"Mon, 13 Dec 2021"
Box-Cox:,False,Time:,01:58:13
Box-Cox Coeff.:,,,

0,1,2,3
,coeff,code,optimized
smoothing_level,0.5989615,alpha,True
smoothing_trend,3.8849e-14,beta,True
smoothing_seasonal,1.4388e-14,gamma,True
initial_level,35.152004,l.0,True
initial_trend,1.0103148,b.0,True
initial_seasons.0,1.3146006,s.0,True
initial_seasons.1,1.0876732,s.1,True
initial_seasons.2,1.2164268,s.2,True
initial_seasons.3,0.9352155,s.3,True


In [409]:
preds = mod.forecast(len(test))


In [410]:
trace1 = Scatter(x = test.index, y = test['mean'], name = 'Original')
trace2 = Scatter(x = test.index,y = preds, name = 'Exponential Smoothing')
iplot(Figure(data = [trace1, trace2]))