In [1]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from impyute.imputation.cs import mice, fast_knn
import numpy as np
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, Ridge, Lasso
from sklearn.feature_selection import RFE
from sklearn.metrics import mean_squared_error, mean_absolute_error, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestRegressor

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import statsmodels.api as sm

import tensorflow.keras.losses
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

from boruta import BorutaPy

import pygad
import pygad.kerasga

In [2]:
def timestamp_data_treatment(dataset, category='numeric'):
    if category == 'binary':
        for column in ['Hora Medicao']:
            date = dataset[column]
            _max = date.max()
            i = 0
            while 2**i < _max:
                i += 1
            binary_rep = [np.binary_repr(z,width=i) for z in date.astype('int')]
            d = []
            for k in range(i):
                d.append(np.array([int(v) for s in binary_rep for v in s[k]]))
            t = np.vstack((d)).T
#             if column == 'Mes':
#                 df_mes = pd.DataFrame(t, columns= [f'Mes_Bit_{ind}' for ind in range(i)])
#             else:
            df_hora = pd.DataFrame(t, columns= [f'Hora_Bit_{ind}' for ind in range(i)])

    elif category == '1ofN':
        for column in ['Hora Medicao']:
            one_hot = pd.get_dummies(dataset[column])
            listoflists = one_hot.values.tolist()
            new_list = []
            for lists in listoflists:
                new_list.append(''.join(map(str, lists)))
            d = []
#             if column == 'Mes':
#                 for k in range(12):
#                         d.append(np.array([int(v) for s in new_list for v in s[k]]))
#                 t = np.vstack((d)).T
#                 df_mes = pd.DataFrame(t, columns= ['Jan', 'Fev', 'Mar','Abr','Mai', 'Jun', 'Jul', 'Ago', 'Set', 'Out', 'Nov', 'Dez'])
#             if column == 'Hora Medicao':
            for k in range(24):
                    d.append(np.array([int(v) for s in new_list for v in s[k]]))
            t = np.vstack((d)).T
            df_hora = pd.DataFrame(t, columns= [f'{ind} h' for ind in range(24)])
    
    elif category == 'numeric':
        for column in ['Hora Medicao']:
            _values = dataset[column].values
            _max = dataset[column].max()
            
            _array = np.array([float(i/_max) for i in _values])
#             if column == 'Mes':
#                 df_mes = pd.DataFrame(_array, columns= ['Mes'])
#             else:
            df_hora = pd.DataFrame(_array, columns= ['Horas'])
    else:
        print('Invalid Category, please select another one...')

    return df_hora    


def preparing_data(dataset, features = [], lag = 24, normalize = 'minmax', category = 'binary', onlyHour = False, onlyMonth = False):
    _max = dataset.iloc[:,-1:].max().values
    _min = dataset.iloc[:,-1:].min().values
    _med = dataset.iloc[:,-1:].mean().values
    _std = dataset.iloc[:,-1:].std().values
    if normalize == 'minmax':
        scaler = MinMaxScaler(feature_range=(0, 1))
        dataset_norm = scaler.fit_transform(dataset.drop([i for i in dataset.columns if i in ['Dia', 'Mes', 'Ano', 'Hora Medicao']],axis=1))
        dataset_norm = pd.DataFrame(dataset_norm, 
                                   columns = dataset.drop([i for i in dataset.columns if i in ['Dia', 'Mes', 'Ano', 'Hora Medicao']],axis=1).columns)
    elif normalize == 'standard':
        scaler = StandardScaler()
        dataset_norm = scaler.fit_transform(dataset.drop([i for i in dataset.columns if i in ['Dia', 'Mes', 'Ano', 'Hora Medicao']],axis=1))
        dataset_norm = pd.DataFrame(dataset_norm, 
                                    columns = dataset.drop([i for i in dataset.columns if i in ['Dia', 'Mes', 'Ano', 'Hora Medicao']],axis=1).columns)
    else:
        print('Invalid Category, please select another one...')
     
    X, Y = look_back_function(dataset_norm['VENTO, VELOCIDADE HORARIA(m/s)'],lag)
#     trend = pd.DataFrame(X.iloc[:, -1] - X.iloc[:, -2], columns = ['Trend'])
#     X = pd.concat([trend, X], axis = 1)
    hora = timestamp_data_treatment(dataset, category)
    
    if len(features):
        for feature in features:
            if feature in dataset_norm.drop([i for i in dataset_norm.columns if i in ['Dia', 'Mes', 'Ano', 'Hora Medicao']], axis=1).columns:
                X = pd.concat([dataset_norm[feature][lag:-1].reset_index(drop=True), X], axis=1)
        
#     if onlyMonth:
#         _input = pd.concat([mes[lag:-1].reset_index(drop=True), X], axis=1)
        
#     if onlyHour:
    _input = pd.concat([hora[lag:-1].reset_index(drop=True), X], axis=1)
        
#     else:
#         _input = pd.concat([mes[lag:-1].reset_index(drop=True), hora[lag:-1].reset_index(drop=True), X], axis=1)
        
    return _input, Y,_max, _min, _med, _std
        

def look_back_function (dataset, look_back=24):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back)]
        dataX.append(a)
        dataY.append(dataset[i + look_back])
    X = pd.DataFrame(np.array(dataX), columns = [f'X(t-{ind})' for ind in reversed(range(look_back))])
    Y = pd.DataFrame(np.array(dataY), columns=['Output'])
    return X, Y


def multistep_prediction(model, Input, lag):
    other_features = Input.iloc[:, :(Input.shape[1]-lag)]
    other_features_array = other_features.values

    wind_feature = Input.iloc[:,-lag:]
    wind_feature_array = wind_feature.values

    for i in range(len(wind_feature_array)):
        if i==0:
            x_multistep = np.concatenate((other_features_array[i],wind_feature_array[i]))
            _y_hat = model.predict(x_multistep.reshape(1,-1))
            predict_array = np.array(_y_hat)
        else:
            if i < lag:
                wind_array_copy =  wind_feature_array[i, :]
                wind_array_copy = wind_array_copy[:-predict_array.shape[0]]
                data_array_with_prediction = np.insert(wind_array_copy,wind_array_copy.size,predict_array)
            else:
                data_array_with_prediction = predict_array[-lag:]
            x_multistep = np.concatenate((other_features_array[i],data_array_with_prediction))
            #print(x_multistep)
            _y_hat = model.predict(x_multistep.reshape(1,-1))
            predict_array = np.append(predict_array, _y_hat)
    predict_data = pd.DataFrame(predict_array, columns=['Predictions'])
    return predict_data

def mask_classification(raw_data):
    classification_list = np.array([])
    for idx, x in raw_data.iterrows():
        if 0 <= x.values <= 4:
            classification_list = np.append(classification_list, 1)
        elif 4 < x.values <= 10:
            classification_list = np.append(classification_list, 2)
        elif 10 < x.values <= 15:
            classification_list = np.append(classification_list, 3)
        elif 15 < x.values <= 25:
            classification_list = np.append(classification_list, 4)
        else:
            classification_list = np.append(classification_list, 5)
    return classification_list

def MAPE(testY, testPredict):
    soma, cont = 0, 0
    for idx, value in testY.iterrows():
        if abs(value.values) >= 0.01:
            erro = abs((value - testPredict.values[idx])/value)
            soma += erro
            cont += 1
    mape = (soma/cont)*100
    return mape

In [3]:
df_train_micetreated = pd.read_csv('Treino_Arraial_do_Cabo_MICE.csv', sep = ';')
df_test_micetreated = pd.read_csv('Teste_Arraial_do_Cabo_MICE.csv', sep = ';')

In [4]:
database = pd.concat([df_train_micetreated, df_test_micetreated]).reset_index(drop=True)
database

Unnamed: 0,Dia,Mes,Ano,Hora Medicao,"PRECIPITACAO TOTAL, HORARIO(mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA(mB)","PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)",PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT)(mB),PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT)(mB),RADIACAO GLOBAL(Kj/m�),...,TEMPERATURA MINIMA NA HORA ANT. (AUT)(�C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT)(�C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT)(�C),TENSAO DA BATERIA DA ESTACAO(V),UMIDADE REL. MAX. NA HORA ANT. (AUT)(%),UMIDADE REL. MIN. NA HORA ANT. (AUT)(%),"UMIDADE RELATIVA DO AR, HORARIA(%)","VENTO, DIRECAO HORARIA (gr)(� (gr))","VENTO, RAJADA MAXIMA(m/s)","VENTO, VELOCIDADE HORARIA(m/s)"
0,1,1,2015,0,0.0,1012.6,1015.039000,1012.7,1012.3,-3.335,...,24.8,21.9,21.1,12.6,82.0,79.0,80.0,89.0,8.1,4.2
1,1,1,2015,1,0.0,1013.6,1014.239084,1013.6,1012.4,-3.381,...,24.6,21.5,20.5,12.6,81.0,76.0,76.0,42.0,10.6,6.7
2,1,1,2015,2,0.0,1012.8,1013.438794,1013.6,1012.7,-3.530,...,25.0,20.9,20.1,12.6,77.0,74.0,77.0,78.0,10.4,4.0
3,1,1,2015,3,0.0,1012.0,1012.639147,1012.9,1011.9,-3.438,...,24.5,21.6,20.8,12.5,82.0,77.0,82.0,61.0,10.2,7.3
4,1,1,2015,4,0.0,1011.2,1011.839071,1012.1,1011.0,-3.179,...,24.4,21.6,21.3,12.5,83.0,82.0,83.0,45.0,12.2,6.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54979,9,4,2021,19,0.0,1019.9,1020.654954,1020.2,1019.9,1719.094,...,27.1,19.5,18.1,14.4,62.0,57.0,58.0,83.0,8.2,6.7
54980,9,4,2021,20,0.0,1019.7,1020.457583,1020.0,1019.7,783.486,...,26.0,18.6,17.7,13.5,63.0,57.0,62.0,94.0,8.7,7.5
54981,9,4,2021,21,0.0,1019.3,1020.058808,1019.7,1019.3,55.956,...,25.4,19.5,17.7,12.8,69.0,60.0,68.0,92.0,10.3,8.3
54982,9,4,2021,22,0.0,1019.5,1020.259466,1019.5,1019.2,-3.539,...,25.2,19.5,18.8,12.6,70.0,67.0,68.0,80.0,11.5,8.7


In [5]:
selection = database['Mes'] == 1
selection

0         True
1         True
2         True
3         True
4         True
         ...  
54979    False
54980    False
54981    False
54982    False
54983    False
Name: Mes, Length: 54984, dtype: bool

In [6]:
Janeiro = database.loc[database['Mes'] == 1]

# Janeiro.to_excel(r'C:\Users\diego.silva\Dropbox\Projetos Finais Mestrado\Janeiro.xlsx', sheet_name='janeiro', index = False)

In [7]:
Janeiro

Unnamed: 0,Dia,Mes,Ano,Hora Medicao,"PRECIPITACAO TOTAL, HORARIO(mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA(mB)","PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)",PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT)(mB),PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT)(mB),RADIACAO GLOBAL(Kj/m�),...,TEMPERATURA MINIMA NA HORA ANT. (AUT)(�C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT)(�C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT)(�C),TENSAO DA BATERIA DA ESTACAO(V),UMIDADE REL. MAX. NA HORA ANT. (AUT)(%),UMIDADE REL. MIN. NA HORA ANT. (AUT)(%),"UMIDADE RELATIVA DO AR, HORARIA(%)","VENTO, DIRECAO HORARIA (gr)(� (gr))","VENTO, RAJADA MAXIMA(m/s)","VENTO, VELOCIDADE HORARIA(m/s)"
0,1,1,2015,0,0.0,1012.6,1015.039000,1012.7,1012.3,-3.335,...,24.8,21.900000,21.100000,12.6,82.000000,79.000000,80.000000,89.0,8.1,4.2
1,1,1,2015,1,0.0,1013.6,1014.239084,1013.6,1012.4,-3.381,...,24.6,21.500000,20.500000,12.6,81.000000,76.000000,76.000000,42.0,10.6,6.7
2,1,1,2015,2,0.0,1012.8,1013.438794,1013.6,1012.7,-3.530,...,25.0,20.900000,20.100000,12.6,77.000000,74.000000,77.000000,78.0,10.4,4.0
3,1,1,2015,3,0.0,1012.0,1012.639147,1012.9,1011.9,-3.438,...,24.5,21.600000,20.800000,12.5,82.000000,77.000000,82.000000,61.0,10.2,7.3
4,1,1,2015,4,0.0,1011.2,1011.839071,1012.1,1011.0,-3.179,...,24.4,21.600000,21.300000,12.5,83.000000,82.000000,83.000000,45.0,12.2,6.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
53347,31,1,2021,19,0.0,1010.1,1010.850953,1010.6,1010.0,1885.523,...,25.4,19.400000,18.500000,14.4,68.000000,63.000000,68.000000,66.0,18.1,10.8
53348,31,1,2021,20,0.0,1009.8,1010.552745,1010.2,1009.7,1335.037,...,25.0,20.200000,19.500000,14.4,75.000000,68.000000,74.000000,67.0,17.6,11.4
53349,31,1,2021,21,0.0,1009.9,1010.654846,1009.9,1009.7,568.797,...,24.2,20.800000,20.200000,13.6,81.000000,74.000000,81.000000,65.0,17.6,9.5
53350,31,1,2021,22,0.0,1010.7,1011.456972,1010.7,1009.9,128.816,...,23.6,21.900000,20.700000,13.0,90.000000,81.000000,90.000000,65.0,14.7,9.1


In [8]:
Janeiro_Train = Janeiro[:4104].reset_index(drop=True)
Janeiro_val = Janeiro[4104:4464].reset_index(drop=True)
Janeiro_Test = Janeiro[4464:].reset_index(drop=True)

In [9]:
Janeiro_val

Unnamed: 0,Dia,Mes,Ano,Hora Medicao,"PRECIPITACAO TOTAL, HORARIO(mm)","PRESSAO ATMOSFERICA AO NIVEL DA ESTACAO, HORARIA(mB)","PRESSAO ATMOSFERICA REDUZIDA NIVEL DO MAR, AUT(mB)",PRESSAO ATMOSFERICA MAX.NA HORA ANT. (AUT)(mB),PRESSAO ATMOSFERICA MIN. NA HORA ANT. (AUT)(mB),RADIACAO GLOBAL(Kj/m�),...,TEMPERATURA MINIMA NA HORA ANT. (AUT)(�C),TEMPERATURA ORVALHO MAX. NA HORA ANT. (AUT)(�C),TEMPERATURA ORVALHO MIN. NA HORA ANT. (AUT)(�C),TENSAO DA BATERIA DA ESTACAO(V),UMIDADE REL. MAX. NA HORA ANT. (AUT)(%),UMIDADE REL. MIN. NA HORA ANT. (AUT)(%),"UMIDADE RELATIVA DO AR, HORARIA(%)","VENTO, DIRECAO HORARIA (gr)(� (gr))","VENTO, RAJADA MAXIMA(m/s)","VENTO, VELOCIDADE HORARIA(m/s)"
0,17,1,2020,0,0.0,1012.0,1012.694969,1012.0,1011.0,-2.503,...,26.3,24.8,24.1,12.7,91.0,87.0,91.0,117.0,3.9,2.2
1,17,1,2020,1,0.0,1012.5,1013.250477,1012.5,1012.0,-2.257,...,26.0,25.3,24.6,12.7,93.0,91.0,92.0,223.0,2.7,0.4
2,17,1,2020,2,0.0,1012.4,1013.150403,1012.5,1012.3,-1.469,...,26.7,25.6,22.5,12.7,92.0,78.0,78.0,270.0,5.0,2.5
3,17,1,2020,3,0.0,1012.4,1013.151908,1012.4,1012.2,-2.628,...,25.8,22.4,20.9,12.6,78.0,71.0,77.0,270.0,9.3,4.9
4,17,1,2020,4,0.0,1012.3,1013.052588,1012.4,1012.2,-2.519,...,25.8,24.7,21.9,12.6,93.0,77.0,93.0,267.0,11.0,4.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,31,1,2020,19,0.0,1010.4,1011.145439,1010.5,1010.3,2246.053,...,28.1,26.5,26.2,14.4,91.0,88.0,90.0,96.0,5.9,4.8
356,31,1,2020,20,0.0,1010.4,1011.148671,1011.1,1010.2,454.527,...,26.7,26.5,25.7,13.4,96.0,90.0,96.0,104.0,6.4,3.5
357,31,1,2020,21,0.4,1011.8,1012.549458,1011.9,1010.4,147.077,...,26.7,26.6,26.1,13.2,99.0,96.0,98.0,99.0,5.3,0.6
358,31,1,2020,22,1.2,1011.8,1012.550208,1012.1,1011.7,64.361,...,26.5,27.2,25.2,12.6,99.0,92.0,92.0,293.0,5.4,0.6


In [10]:
X_train, Y_train,_maxtrain, _mintrain, _meantrain, _stdtrain = preparing_data(Janeiro_Train, lag = 12, normalize = 'standard', category = '1ofN', onlyHour= True)
X_test, Y_test,_maxtest, _mintest, _meantest, _stdtest = preparing_data(Janeiro_Test, lag = 12, normalize = 'standard', category = '1ofN', onlyHour= True)

S = 12
N_input = int(len(X_test)/S)
frames_input = [ X_test.iloc[i*S:(i+1)*S].copy().reset_index(drop=True) for i in range(N_input) ]

N_output = int(len(Y_test)/S)
frames_output = [ Y_test.iloc[i*S:(i+1)*S].copy().reset_index(drop=True) for i in range(N_output) ]

In [11]:
len(frames_output)

60

In [12]:
frames_input[0]

Unnamed: 0,0 h,1 h,2 h,3 h,4 h,5 h,6 h,7 h,8 h,9 h,...,X(t-9),X(t-8),X(t-7),X(t-6),X(t-5),X(t-4),X(t-3),X(t-2),X(t-1),X(t-0)
0,0,0,0,0,0,0,0,0,0,0,...,-1.267454,-1.244081,-1.046184,-0.979418,-1.002832,-0.9183,-0.892708,-0.901285,-1.397719,-1.217649
1,0,0,0,0,0,0,0,0,0,0,...,-1.244081,-1.046184,-0.979418,-1.002832,-0.9183,-0.892708,-0.901285,-1.397719,-1.217649,-1.454146
2,0,0,0,0,0,0,0,0,0,0,...,-1.046184,-0.979418,-1.002832,-0.9183,-0.892708,-0.901285,-1.397719,-1.217649,-1.454146,-0.788881
3,0,0,0,0,0,0,0,0,0,0,...,-0.979418,-1.002832,-0.9183,-0.892708,-0.901285,-1.397719,-1.217649,-1.454146,-0.788881,-0.725019
4,0,0,0,0,0,0,0,0,0,0,...,-1.002832,-0.9183,-0.892708,-0.901285,-1.397719,-1.217649,-1.454146,-0.788881,-0.725019,-0.706547
5,0,0,0,0,0,0,0,0,0,0,...,-0.9183,-0.892708,-0.901285,-1.397719,-1.217649,-1.454146,-0.788881,-0.725019,-0.706547,-0.510718
6,0,0,0,0,0,0,0,0,0,0,...,-0.892708,-0.901285,-1.397719,-1.217649,-1.454146,-0.788881,-0.725019,-0.706547,-0.510718,-0.665038
7,0,0,0,0,0,0,0,0,0,0,...,-0.901285,-1.397719,-1.217649,-1.454146,-0.788881,-0.725019,-0.706547,-0.510718,-0.665038,-0.797983
8,0,0,0,0,0,0,0,0,0,0,...,-1.397719,-1.217649,-1.454146,-0.788881,-0.725019,-0.706547,-0.510718,-0.665038,-0.797983,-0.74774
9,0,0,0,0,0,0,0,0,0,0,...,-1.217649,-1.454146,-0.788881,-0.725019,-0.706547,-0.510718,-0.665038,-0.797983,-0.74774,-0.720403


In [None]:
for _lag in [3, 6, 12, 24]:
    for norm in ['minmax']:
        for cat in ['1ofN', 'binary', 'numeric']:
            print('----'*15)
            print(f'lag: {_lag}, normalize: {norm}, category: {cat}')
            X_train, Y_train,_maxtrain, _mintrain, _meantrain, _stdtrain = preparing_data(Janeiro_Train, 
                                                                                          lag = _lag, 
                                                                                          normalize = norm, 
                                                                                          category = cat, 
                                                                                          onlyHour= True)
            X_val, Y_val,_maxval, _minval, _meanval, _stdval = preparing_data(Janeiro_val, 
                                                                              lag = _lag, 
                                                                              normalize = norm, 
                                                                              category = cat, 
                                                                              onlyHour= True)
            

#             N_input = int(len(X_val)/_lag)
#             frames_input = [ X_val.iloc[i*_lag:(i+1)*_lag].copy().reset_index(drop=True) for i in range(N_input) ]

#             N_output = int(len(Y_val)/_lag)
#             frames_output = [ Y_val.iloc[i*_lag:(i+1)*_lag].copy().reset_index(drop=True) for i in range(N_output) ]


            for j in [2, 3, 4, 5, 6, 7, 8, 9 ,10]:   
                model = Sequential()

                model.add(Dense(j, input_dim=X_train.shape[1], activation='relu'))


                model.add(Dense(1, activation='linear'))     
                opt = Adam(lr=0.11)
                model.compile(loss='mean_absolute_error', optimizer=opt, 
                              metrics=['mean_absolute_error','mean_squared_error'])

                earlyStopping = EarlyStopping(monitor= 'mean_absolute_error', 
                                              patience=250, 
                                              verbose=1, 
                                              restore_best_weights=True)

                model.fit(X_train, Y_train, epochs=500,validation_split=(0.2),callbacks=[earlyStopping], verbose=0)
                soma_rmse = 0
                soma_mape = 0
                for i in range(1, 10):
                    testpredict_day = multistep_prediction(model,X_val[:24].reset_index(drop=True), _lag)

                    if cat == 'standard':
                        orig_y_eval_test_day = Y_val[:24].reset_index(drop=True)*(_stdval) + _meanval
                        orig_y_hat_test_day = testpredict_day*(_stdval) + _meanval
                    else:
                        orig_y_eval_test_day = Y_val[:24].reset_index(drop=True)*(_maxval - _minval) + _minval
                        orig_y_hat_test_day = testpredict_day*(_maxval - _minval) + _minval

                    RMSE_test_day = (mean_squared_error(orig_y_eval_test_day, orig_y_hat_test_day))**0.5
                    MAPE_test_day = MAPE(orig_y_eval_test_day, orig_y_hat_test_day)

                    soma_rmse += RMSE_test_day
                    soma_mape += MAPE_test_day

                print(f'RMSE médio: {soma_rmse/10}')
                print(f'MAPE médio: {soma_mape/10}')

------------------------------------------------------------
lag: 3, normalize: minmax, category: 1ofN
RMSE médio: 1.9115410439617615
MAPE médio: Output    47.953045
Name: 0, dtype: float64
Restoring model weights from the end of the best epoch.
Epoch 00441: early stopping
RMSE médio: 1.2687122720343549
MAPE médio: Output    39.761719
Name: 0, dtype: float64
Restoring model weights from the end of the best epoch.
Epoch 00374: early stopping
RMSE médio: 1.2643913743468296
MAPE médio: Output    47.615478
Name: 0, dtype: float64
Restoring model weights from the end of the best epoch.
Epoch 00394: early stopping
RMSE médio: 1.3092302379514962
MAPE médio: Output    46.530732
Name: 0, dtype: float64
Restoring model weights from the end of the best epoch.
Epoch 00266: early stopping
RMSE médio: 1.1514492840390702
MAPE médio: Output    27.561941
Name: 0, dtype: float64
Restoring model weights from the end of the best epoch.
Epoch 00376: early stopping
RMSE médio: 3.3368389223265202
MAPE médio:

In [None]:
from xgboost import XGBRegressor
for _lag in [3, 6, 12, 24]:
    for norm in ['minmax']:
        for cat in ['1ofN', 'binary', 'numeric']:
            print('----'*15)
            print(f'lag: {_lag}, normalize: {norm}, category: {cat}')
            X_train, Y_train,_maxtrain, _mintrain, _meantrain, _stdtrain = preparing_data(Janeiro_Train, 
                                                                                          lag = _lag, 
                                                                                          normalize = norm, 
                                                                                          category = cat, 
                                                                                          onlyHour= True)
            X_val, Y_val,_maxval, _minval, _meanval, _stdval = preparing_data(Janeiro_val, 
                                                                        lag = _lag, 
                                                                        normalize = norm, 
                                                                        category = cat, 
                                                                        onlyHour= True)
            
#             N_input = int(len(X_val)/_lag)
#             frames_input = [ X_val.iloc[i*_lag:(i+1)*_lag].copy().reset_index(drop=True) for i in range(N_input) ]

#             N_output = int(len(Y_val)/_lag)
#             frames_output = [ Y_val.iloc[i*_lag:(i+1)*_lag].copy().reset_index(drop=True) for i in range(N_output) ]

            xgbmodel = XGBRegressor(n_estimators=150, 
                     max_depth=8,
                     eta=0.4, subsample=0.6,
                     colsample_bytree=0.4,
                     learning_rate = 0.15)

            xgbmodel.fit(X_train,Y_train)

            soma_rmse = 0
            soma_mape = 0
            for i in range(10):
                testpredict_day = multistep_prediction(xgbmodel,X_val[:24].reset_index(drop=True), _lag)
                if cat == 'standard':
                    orig_y_eval_test_day = Y_val[:24].reset_index(drop=True)*(_stdval) + _meanval
                    orig_y_hat_test_day = testpredict_day*(_stdval) + _meanval
                else:
                    orig_y_eval_test_day = Y_val[:24].reset_index(drop=True)*(_maxval - _minval) + _minval
                    orig_y_hat_test_day = testpredict_day*(_maxval - _minval) + _minval
                    
                RMSE_test_day = (mean_squared_error(orig_y_eval_test_day, orig_y_hat_test_day))**0.5
                MAPE_test_day = MAPE(orig_y_eval_test_day, orig_y_hat_test_day)

                soma_rmse += RMSE_test_day
                soma_mape += MAPE_test_day

            print(f'RMSE médio: {soma_rmse/10}')
            print(f'MAPE médio: {soma_mape/10}')

In [23]:
from sklearn.svm import SVR
for _lag in [3,6,12,24]:
    for norm in ['minmax']:
        for cat in ['1ofN', 'binary', 'numeric']:
            print('----'*15)
            print(f'lag: {_lag}, normalize: {norm}, category: {cat}')
            X_train, Y_train,_maxtrain, _mintrain, _meantrain, _stdtrain = preparing_data(Janeiro_Train, 
                                                                                          lag = _lag, 
                                                                                          normalize = norm, 
                                                                                          category = cat, 
                                                                                          onlyHour= True)
            X_val, Y_val,_maxval, _minval, _meanval, _stdval = preparing_data(Janeiro_val, 
                                                                        lag = _lag, 
                                                                        normalize = norm, 
                                                                        category = cat, 
                                                                        onlyHour= True)
            
            X_test, Y_test,_maxtest, _mintest, _meantest, _stdtest = preparing_data(Janeiro_Test, 
                                                                                    lag = _lag, 
                                                                                    normalize = norm, 
                                                                                    category = cat, 
                                                                                    onlyHour= True)
#             N_input = int(len(X_val)/_lag)
#             frames_input = [ X_val.iloc[i*_lag:(i+1)*_lag].copy().reset_index(drop=True) for i in range(N_input) ]

#             N_output = int(len(Y_val)/_lag)
#             frames_output = [ Y_val.iloc[i*_lag:(i+1)*_lag].copy().reset_index(drop=True) for i in range(N_output) ]

            regressor = SVR(kernel='linear', C=0.5, epsilon=10)

            regressor.fit(X_train, Y_train)

            soma_rmse = 0
            soma_mape = 0
            for i in range(10):
                testpredict = multistep_prediction(regressor,X_val[-24:].reset_index(drop=True), _lag)
                if cat == 'standard':
                    orig_y_eval_test = Y_val[-24:].reset_index(drop=True)*(_stdval) + _meanval
                    orig_y_hat_test = testpredict_day*(_stdval) + _meanval
                else:
                    orig_y_eval_test = Y_val[-24:].reset_index(drop=True)*(_maxval - _minval) + _minval
                    orig_y_hat_test = testpredict_day*(_maxval - _minval) + _minval

                RMSE_test_day = (mean_squared_error(orig_y_eval_test, orig_y_hat_test))**0.5
                MAPE_test_day = MAPE(orig_y_eval_test, orig_y_hat_test)

                soma_rmse += RMSE_test_day
                soma_mape += MAPE_test_day

            print(f'RMSE médio: {soma_rmse/10}')
            print(f'MAPE médio: {soma_mape/10}')

------------------------------------------------------------
lag: 3, normalize: minmax, category: 1ofN


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 3, normalize: minmax, category: binary


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 3, normalize: minmax, category: numeric


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 6, normalize: minmax, category: 1ofN


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 6, normalize: minmax, category: binary


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 6, normalize: minmax, category: numeric


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 12, normalize: minmax, category: 1ofN


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 12, normalize: minmax, category: binary


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 12, normalize: minmax, category: numeric


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 24, normalize: minmax, category: 1ofN


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 24, normalize: minmax, category: binary


  return f(*args, **kwargs)


RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64
------------------------------------------------------------
lag: 24, normalize: minmax, category: numeric
RMSE médio: 2.371796084545775
MAPE médio: Output    113.842589
Name: 0, dtype: float64


  return f(*args, **kwargs)


In [None]:
X_test, Y_test,_maxtest, _mintest, _meantest, _stdtest = preparing_data(Janeiro_Test, 
                                                                        lag = , 
                                                                        normalize = , 
                                                                        category = , 
                                                                        onlyHour= True)
X_test_day = X_test[-24:].reset_index(drop=True)
Y_test_day = Y_test[-24:].reset_index(drop=True)

testpredict_day_rn = multistep_prediction(model,X_test_day, 24)



orig_y_eval_test_day_rn = Y_test_day*(_maxtest - _mintest) + _mintest
orig_y_hat_test_day_rn = testpredict_day_rn*(_maxtest - _mintest) + _mintest

RMSE_test_day_rn = (mean_squared_error(orig_y_eval_test_day_rn, orig_y_hat_test_day_rn))**0.5
MAPE_test_day_rn = MAPE(orig_y_eval_test_day_rn, orig_y_hat_test_day_rn)

print(f'RMSE : {RMSE_test_day_rn}')
print(f'MAPE : {MAPE_test_day_rn}')

testpredict_day_rn = multistep_prediction(model,X_test_day, 24)



orig_y_eval_test_day_rn = Y_test_day*(_maxtest - _mintest) + _mintest
orig_y_hat_test_day_rn = testpredict_day_rn*(_maxtest - _mintest) + _mintest

RMSE_test_day_rn = (mean_squared_error(orig_y_eval_test_day_rn, orig_y_hat_test_day_rn))**0.5
MAPE_test_day_rn = MAPE(orig_y_eval_test_day_rn, orig_y_hat_test_day_rn)

print(f'RMSE : {RMSE_test_day_rn}')
print(f'MAPE : {MAPE_test_day_rn}')

testpredict_day_xgb = multistep_prediction(xgbmodel,X_test_day, 24)

orig_y_eval_test_day_xgb = Y_test_day*(_maxtest - _mintest) + _mintest
orig_y_hat_test_day_xgb = testpredict_day_xgb*(_maxtest - _mintest) + _mintest

RMSE_test_day_xgb = (mean_squared_error(orig_y_eval_test_day_xgb, orig_y_hat_test_day_xgb))**0.5
MAPE_test_day_xgb = MAPE(orig_y_eval_test_day_xgb, orig_y_hat_test_day_xgb)

print(f'RMSE : {RMSE_test_day_xgb}')
print(f'MAPE : {MAPE_test_day_xgb}')

testpredict_day_svr = multistep_prediction(regressor,X_test_day, 24)

orig_y_eval_test_day_svr = Y_test_day*(_maxtest - _mintest) + _mintest
orig_y_hat_test_day_svr = testpredict_day_svr*(_maxtest - _mintest) + _mintest

RMSE_test_day_svr = (mean_squared_error(orig_y_eval_test_day_svr, orig_y_hat_test_day_svr))**0.5
MAPE_test_day_svr = MAPE(orig_y_eval_test_day_svr, orig_y_hat_test_day_svr)

print(f'RMSE : {RMSE_test_day_svr}')
print(f'MAPE : {MAPE_test_day_svr}')