In [3]:
from pandas import read_csv
import pandas as pd
from pandas.plotting import lag_plot
import seaborn as sn
import matplotlib.pyplot as plt
import numpy as np
import os
import zipfile
import io
from sklearn.preprocessing import MinMaxScaler
import matplotlib.cm as cm
from bokeh.layouts import gridplot
from bokeh.plotting import figure, output_file, show
import plotly.express as px
import plotly
from IPython.display import Image
import glob
import cv2

# Imports
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit
import sklearn.metrics as sm
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn import svm
from sklearn.linear_model import LinearRegression 
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
import math
# Para guardar los modelos entrenados
import joblib as joblib
# Para crossvalidación
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate

In [4]:
df_predictor_lag3_escalado = read_csv('../Datos_preprocesados/predictor_lag3_escalado.csv', encoding='latin-1', sep = ',', na_values = ['NaN', 'NaT'])
df_predictor_lag3_escalado = df_predictor_lag3_escalado.set_index('Fecha')
df_predictor_lag3_escalado.head(3)

df_predictor_lag5_escalado = read_csv('../Datos_preprocesados/predictor_lag5_escalado.csv', encoding='latin-1', sep = ',', na_values = ['NaN', 'NaT'])
df_predictor_lag5_escalado = df_predictor_lag5_escalado.set_index('Fecha')
df_predictor_lag5_escalado.head(3)


X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(df_predictor_lag3_escalado.drop(['Incidentes'], axis=1),
                                        df_predictor_lag3_escalado['Incidentes'], train_size = 0.8, test_size = 0.2, 
                                        random_state = 42, shuffle = False)

X_train_5, X_test_5, y_train_5, y_test_5 = train_test_split(df_predictor_lag5_escalado.drop(['Incidentes'], axis=1),
                                        df_predictor_lag5_escalado['Incidentes'], train_size = 0.8, test_size = 0.2, 
                                        random_state = 42, shuffle = False)

In [7]:
cv = TimeSeriesSplit(n_splits = 10)

def search_grid(estimador, grid, X_train, y_train):
    # Buscamos los mejores hiperparametros
    clf = GridSearchCV(estimator=estimador, 
                        param_grid=grid,
                        cv=cv,
                        #refit=True,
                        #error_score=0,
                        # POR DEFECTO -> R^2
                        #scoring = miscorer,#'neg_root_mean_squared_error',
                        scoring = 'neg_root_mean_squared_error',
                        #scoring = miscorer,
                        n_jobs=-1,
                        return_train_score=True)

    clf.fit(X_train, y_train)
    
    return clf

In [5]:
rf = RandomForestRegressor(random_state = 42)

grid = {
    'bootstrap': [True],
    'max_depth': [5, 7, 10, 15, 20],
    'max_features': [2, 3, 5],
    'min_samples_leaf': [1, 2, 4, 5],
    'min_samples_split': [4, 8, 10, 14],
    'n_estimators': [100, 300, 500, 1000, 1500]
}

In [8]:
clf_rf = search_grid(rf, grid, X_train_3, y_train_3)

In [9]:
def MAE(y_pred, y_test):
    mae = round(sm.mean_absolute_error(y_test, y_pred), 2)
    print("Mean absolute error =", mae)
    return mae

def RMSE(y_pred, y_test):
    rmse = round(math.sqrt(sm.mean_squared_error(y_test, y_pred)), 2)
    print("RMSE =", rmse)
    return rmse

def grafica(p, t, i):
    
    plt.style.use('seaborn-darkgrid')  
    
    pd.Series(p).plot(color = 'forestgreen', linewidth = 1.8, alpha = 0.9, label = 'Incidentes predecidos')
    t.plot(color = 'dodgerblue', linewidth = 1.8, alpha = 0.9, label = 'Incidentes Reales') 
    
    titulo = 'Indicentes t+'+str(i+1)
    if i+1 == 0:
        titulo = 'Incidentes t'  
        
    plt.title(titulo, loc ='center', fontsize = 12, fontweight = 0, color = 'black')
    
    plt.legend(loc ='best')

    plt.xticks(rotation = 90)
    plt.show()
    

def calcular_predicciones_lag3(modelo, y_pred, X_test):
predicciones = []
X_test_modelo = X_test.copy()
for i in range(1,8):
    # Modificamos el conjunto de test
    X_test_modelo['Incidentes t-3'] = X_test_modelo['Incidentes t-2']
    X_test_modelo['Incidentes t-2'] = X_test_modelo['Incidentes t-1']
    X_test_modelo['Incidentes t-1'] = y_pred
    # Predecimos
    y_pred = modelo.predict(X_test_modelo)
    # Ajustamos al tamaño
    pred = y_pred[i:]
    predicciones.append(pred)
return predicciones

def visualizar_predicciones(y_test, predicciones):
    
    # TODO : CREO NO NECESARIO
    #test = y_test.copy()

    #palette = plt.get_cmap('prism')
    plt.style.use('seaborn-darkgrid')

    for i in range(1,len(predicciones)+1):
        # Tomo la predicción en t+i
        s = pd.Series(predicciones[i-1])
        # Para mostrarla, la desplazo i valores a la derecha para colocarla en el dia correspondiente de prediccion
        s = s.shift(+i)
        s.dropna(axis = 0, inplace = True)

        s.plot(linewidth = 1.6, alpha = 0.9, label = 'Incidentes predecidos t+'+str(i))
        #s.plot(linewidth = 1.6, alpha = 0.9, label = 'Incidentes predecidos t+'+str(i))
        
        # TODO : CREO NO NECESARIO
        #test = test.drop([test.index[0]])

    y_test.plot(linewidth = 1.6, alpha = 0.9, label = 'Incidentes Reales') 
    #y_test.plot(linewidth = 1.6, alpha = 0.9, label = 'Incidentes Reales') 

    plt.title('Evolución incidentes', loc ='center', fontsize = 12, fontweight = 0, color = 'black')

    #plt.legend(loc='best')
    plt.legend(loc = 'center left', bbox_to_anchor = (1, 0.5))

    plt.xticks(rotation=90)
    plt.show()

IndentationError: expected an indented block (<ipython-input-9-049b88a30ed0>, line 31)

In [None]:
optimised_rf = clf_rf.best_estimator_

clf_rf.best_params_

joblib.dump(optimised_rf, 'rf_lag3_prueba.pkl')

In [None]:
optimised_rf_lag3 = joblib.load('rf_lag3_prueba.pkl')

y_pred = optimised_rf_lag3.predict(X_test_3)

In [None]:
RMSEs_test_lag3_rf = []
MAEs_test_lag3_rf = []
CCs_test_lag3_rf = []

RMSEs_test_lag3_rf.append(RMSE(y_pred, y_test_3))
MAEs_test_lag3_rf.append(MAE(y_pred, y_test_3))

In [None]:
grafica(y_pred, y_test_3, -1)

In [None]:
predicciones_rf_lag3 = calcular_predicciones_lag3(optimised_rf_lag3, y_pred, X_test_3)

visualizar_predicciones(y_test_3, predicciones_rf_lag3)