# Modelo SVR

## Importacion de las paqueterias

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import itertools

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate

from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor

from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

## Importacion de los Datasets

In [2]:
# Dataset de entrenamiento
df_Nizanda = pd.read_csv('./Datasets/Nizanda_T1.csv')
df_Nizanda

Unnamed: 0,Day,Month,Year,Hour,Barometer,Temp C,High Temp C,Low Temp C,Hum %,Dew Point C,...,Wind Run,High Wind Speed m/s,High Wind Direction,Wind Chill C,Heat Index C,THW Index C,Rain mm,Rain Rate mm/h,ET mm,Date
0,1,1,2017,0,754.7,25.2,25.7,24.9,84.0,22.3,...,3.1,11.2,NW,25.2,27.3,27.3,0.0,0.0,,2017-01-01 00:00:00
1,1,1,2017,1,754.5,24.6,24.9,24.4,86.0,22.1,...,3.1,11.2,WNW,24.6,26.5,26.5,0.0,0.0,,2017-01-01 01:00:00
2,1,1,2017,2,754.2,24.5,24.6,24.4,81.0,21.0,...,2.7,10.7,WNW,24.5,26.2,26.2,0.0,0.0,,2017-01-01 02:00:00
3,1,1,2017,3,753.9,24.4,24.6,24.2,84.0,21.5,...,3.1,10.7,WNW,24.4,26.1,26.1,0.0,0.0,,2017-01-01 03:00:00
4,1,1,2017,4,753.7,24.1,24.3,24.1,86.0,21.6,...,3.1,12.5,WNW,24.1,25.9,25.9,0.0,0.0,,2017-01-01 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17406,31,12,2018,19,754.6,26.6,26.8,26.4,73.0,21.4,...,2.2,9.4,N,26.6,28.4,28.4,0.0,0.0,,2018-12-31 19:00:00
17407,31,12,2018,20,754.8,26.2,26.4,26.0,75.0,21.4,...,2.2,9.8,WNW,26.2,27.9,27.9,0.0,0.0,,2018-12-31 20:00:00
17408,31,12,2018,21,755.0,25.9,26.0,25.8,74.0,20.9,...,2.2,8.5,N,25.9,27.4,27.4,0.0,0.0,,2018-12-31 21:00:00
17409,31,12,2018,22,755.4,25.7,25.8,25.5,75.0,20.9,...,1.8,7.1,NE,25.7,27.2,27.2,0.0,0.0,,2018-12-31 22:00:00


In [3]:
# Creacion del diccionario resultados_LR
resSvr = {'Modelo': [], 'Horizon': [], 'R2': [], 'RSME': [], 'MAE': [], 'Comb': [], 'Params': []}

# Transformacion de diccionario a Dataframe
df_SVR = pd.DataFrame(resSvr)
df_SVR

Unnamed: 0,Modelo,Horizon,R2,RSME,MAE,Comb,Params


## Funciones de procesado y graficacion de datos 

In [4]:
def data_preprocessing(df, scaler):        
    # Escalando el dataset
    data_scaled = scaler.transform(df)

    values = data_scaled
    
    if len(df.columns) == 1:
        values = values[:, 0]
    
    return values, len(df.columns)

In [5]:
def get_labeled_window(data, window_size, horizon, n_cols):
    if n_cols == 1:
        return data[:, :window_size], data[:, -horizon:]
    else:
        #print(data.shape)
        return data[:, :window_size], data[:, -horizon:, 3]
    
def make_windows(data, input_size, horizon, n_cols, shift = 0):
    # Crea una matriz 2D con el la longuitud del input size + horizon
    window_step = np.expand_dims(np.arange(input_size + horizon + shift), axis = 0)
    window_indexes = window_step + np.expand_dims(np.arange(len(data) - (shift + input_size + horizon - 1)), axis = 0).T
    windowed_array = data[window_indexes]
    
    windows, labels = get_labeled_window(windowed_array, window_size = input_size, horizon = horizon, n_cols = n_cols)
    
    return windows, labels

In [6]:
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df = df_Nizanda,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])

In [7]:
def run_experiment(data, window, horizon, shift, scaler, cv, model):
    w, n_cols = data_preprocessing(data, scaler)
    x_, y_ = make_windows(w, input_size = window, horizon = horizon, shift = shift, n_cols = n_cols)

    if len(x_.shape) > 2:
        # Convertir a arreglo de 2 dimensiones
        rows, nx, ny = x_.shape
        x_ = x_.reshape((rows, nx*ny))
        
    cv_results = cross_validate(
        model,
        x_,
        y_,
        cv=cv,
        scoring=["neg_mean_absolute_error", "neg_root_mean_squared_error", "r2"],
    )
    mae = -cv_results["test_neg_mean_absolute_error"]
    rmse = -cv_results["test_neg_root_mean_squared_error"]
    r2 = cv_results["test_r2"]
    print(
        f"Coefficient of determination: {r2.mean():.3f} +/- {r2.std():.3f}\n"
        f"Root Mean Squared Error: {rmse.mean():.3f} +/- {rmse.std():.3f}\n"
        f"Mean Absolute Error:     {mae.mean():.3f} +/- {mae.std():.3f}\n"
    )
    return {'r2': r2.mean(), 'rmse': rmse.mean(), 'mae': mae.mean()}

In [8]:
def scores_cv(model, x_, y_, cv, verbose = False):
    cv_results = cross_validate(
        model,
        x_,
        y_,
        cv=cv,
        scoring=["neg_mean_absolute_error", "neg_root_mean_squared_error", "r2"],
    )
    mae = -cv_results["test_neg_mean_absolute_error"]
    rmse = -cv_results["test_neg_root_mean_squared_error"]
    r2 = cv_results["test_r2"]
    if verbose == True:
        print(
            f"Coefficient of determination: {r2.mean():.3f} +/- {r2.std():.3f}\n"
            f"Root Mean Squared Error: {rmse.mean():.3f} +/- {rmse.std():.3f}\n"
            f"Mean Absolute Error:     {mae.mean():.3f} +/- {mae.std():.3f}\n"
        )
    return {'r2': r2.mean(), 'rmse': rmse.mean(), 'mae': mae.mean()}

In [9]:
def run_test(data, window, horizon, shift, scaler, target, model):
    w, n_cols = data_preprocessing(data, scaler)
    x_, y_ = make_windows(w, input_size= window, horizon = horizon, shift = shift,  n_cols = n_cols)
    
    if len(x_.shape) > 2:
        # Convertir a arreglo de 2 dimensiones
        rows, nx, ny = x_.shape
        x_ = x_.reshape((rows, nx*ny))
    
    model.fit(x_, y_)
    y_pred = model.predict(x_)
    
    r2 = r2_score(y_, y_pred)
    mae = mean_absolute_error(y_, y_pred)
    rmse = mean_squared_error(y_, y_pred, squared= False)
    return {'r2': r2, 'rmse': rmse, 'mae': mae}, y_pred

In [10]:
# Graficando la regresion lineal dentro de la serie de tiempo del viento
def plot_series(Y_true, Y_pred):
    fig, ax = plt.subplots(figsize = (16,8))
    ax.plot(Y_true, color='0.75')
    ax.plot(Y_pred, color='blue')
    ax.set_title('Serie de Tiempo de la velocidad del viento');

## Metodos de prediccion

In [11]:
def recursive_forecast(modelo, Ventana, Iters = 1, Rolling = False):
    # Realizar el metodo de prediccion recursiva
    # parametros a utilizar: 
    # Ventana: La lista con el numero de entradas para realizar la prediccion
    # Preds: La lista del resultado de las n predicciones hechas
    # Iters: Numero de las iteraciones a realizar
    # Rolling: (True, False) Si Rolling es verdadero, mover la lista ventana, si es falso, no mover la ventana 
    Preds = [] # Lista con los resultados de las predicciones
    for i in range(Iters):
        w = Ventana[i:]
        pred = modelo.predict(w.reshape(1, -1))
        Ventana = np.append(Ventana, pred)
        Preds.append(pred)
    return Ventana, Preds

## Experimentaicion

In [12]:
# Lista de combinaciones para los experimentos
list1 = [['Month', 'Day', 'Hour', 'High Wind Speed m/s'], ['High Wind Speed m/s']]
list2 = [24, 48, 72, 96]
list3 = [1, 24]
listcomb = [list1, list2, list3]
# Realiza las distitnas combinaciones
combinations = [p for p in itertools.product(*listcomb)]
# Elimina las combinaciones SIMO
for idx, comb in enumerate(combinations):
    if comb[2] == 24 and comb[0] == 'High Wind Speed m/s':
        combinations.pop(idx)

# Agrega cada combinacion a su respectiva lista
list_rect = []
list_mimo = []
for comb in combinations:
    if comb[2] == 1:
        list_rect.append(comb)
    else:
        list_mimo.append(comb)

### Entrenamiento Recursivo

In [13]:
%%time
#SVR
model = SVR()
# Parametros del modelo
params = {'C': [1, 10, 100, 1000], 'epsilon': [0.001, 0.01, 0.1, 1], 'gamma': [0.001, 0.01, 0.1, 1],'kernel': ['rbf']}
keys = ['epsilon', 'C', 'gamma']

# Time Series Cross Validatino
tscv = TimeSeriesSplit(n_splits = 4)
# Normalizacion
minmax_scl = MinMaxScaler()

for i, comb in enumerate(list_rect):
    df_train = df_Nizanda.copy()
    drop_cols = [col for col in df_Nizanda.columns if col not in comb[0]]
    df_train.drop(drop_cols, axis = 1, inplace = True)
    minmax_scl.fit(df_train)

    window = comb[1]
    horizon = comb[2]
    shift = 0
    w, n_cols = data_preprocessing(df_train, minmax_scl)
    x_, y_ = make_windows(w, input_size = window, horizon = horizon, shift = shift, n_cols = n_cols)
    
    if len(x_.shape) > 2:
        # Convertir a arreglo de 2 dimensiones
        rows, nx, ny = x_.shape
        x_ = x_.reshape((rows, nx*ny))
    
    print('---------------Inicio de experimento: '+ str(i) +' -----------------')
    
    bestparams = {'C': [1], 'epsilon': [0.1], 'gamma': [0.1],'kernel': ['rbf']}
    # Probando el SVR y GridSearchCV
    for i in range(3):
        print('--Etapa ' + str(i + 1) + '--')
        print('GS: '+ str(keys[i]))
        bestparams.update({keys[i]: params[keys[i]]})
        print(bestparams)
        reg = GridSearchCV(estimator = model, param_grid = bestparams, cv = tscv, scoring = 'r2', n_jobs = -1, verbose = True)
        reg.fit(x_, y_.ravel())
        bestparams.update({keys[i]: [reg.best_params_[keys[i]]]})
    
    print('Parametros del experimento ' + str(i + 1) + ': ' + str(bestparams))
    metrics = scores_cv(SVR(C = bestparams['C'][0], epsilon = bestparams['epsilon'][0], gamma = bestparams['gamma'][0]), x_, y_.ravel(), tscv, verbose = False)
    df_SVR.loc[len(df_SVR.index)] = ['SVR', comb[2], metrics['r2'], metrics['rmse'], metrics['mae'], [comb[0], comb[1], comb[2]], bestparams]
    
    print('---------------Fin de experimento: '+ str(i) +' -----------------')
print('---------------Lote de experimentos terminados -----------------')

---------------Inicio de experimento: 0 -----------------
--Etapa 1--
GS: epsilon
{'C': [1], 'epsilon': [0.001, 0.01, 0.1, 1], 'gamma': [0.1], 'kernel': ['rbf']}
Fitting 4 folds for each of 4 candidates, totalling 16 fits
--Etapa 2--
GS: C
{'C': [1, 10, 100, 1000], 'epsilon': [0.01], 'gamma': [0.1], 'kernel': ['rbf']}
Fitting 4 folds for each of 4 candidates, totalling 16 fits
--Etapa 3--
GS: gamma
{'C': [1], 'epsilon': [0.01], 'gamma': [0.001, 0.01, 0.1, 1], 'kernel': ['rbf']}
Fitting 4 folds for each of 4 candidates, totalling 16 fits
Parametros del experimento 3: {'C': [1], 'epsilon': [0.01], 'gamma': [0.01], 'kernel': ['rbf']}


  element = np.asarray(element)


---------------Fin de experimento: 2 -----------------
---------------Inicio de experimento: 1 -----------------
--Etapa 1--
GS: epsilon
{'C': [1], 'epsilon': [0.001, 0.01, 0.1, 1], 'gamma': [0.1], 'kernel': ['rbf']}
Fitting 4 folds for each of 4 candidates, totalling 16 fits
--Etapa 2--
GS: C
{'C': [1, 10, 100, 1000], 'epsilon': [0.01], 'gamma': [0.1], 'kernel': ['rbf']}
Fitting 4 folds for each of 4 candidates, totalling 16 fits
--Etapa 3--
GS: gamma
{'C': [1], 'epsilon': [0.01], 'gamma': [0.001, 0.01, 0.1, 1], 'kernel': ['rbf']}
Fitting 4 folds for each of 4 candidates, totalling 16 fits
Parametros del experimento 3: {'C': [1], 'epsilon': [0.01], 'gamma': [0.01], 'kernel': ['rbf']}
---------------Fin de experimento: 2 -----------------
---------------Inicio de experimento: 2 -----------------
--Etapa 1--
GS: epsilon
{'C': [1], 'epsilon': [0.001, 0.01, 0.1, 1], 'gamma': [0.1], 'kernel': ['rbf']}
Fitting 4 folds for each of 4 candidates, totalling 16 fits
--Etapa 2--
GS: C
{'C': [1, 1

In [17]:
df_SVR.to_csv(r'C:\Users\Angel\Documents\Tesis\Notebooks\Resultados\SVR_Rec.csv')

### Entrenamiento MIMO

In [None]:
### USAR PARA METODO MIMO
for i, comb in enumerate(list_rect):
    df_train = df_Nizanda.copy()
    drop_cols = [col for col in df_Nizanda.columns if col not in comb[0]]
    df_train.drop(drop_cols, axis = 1, inplace = True)
    minmax_scl.fit(df_train)

    window = comb[1]
    horizon = comb[2]
    shift = 0
    w, n_cols = data_preprocessing(df_train, minmax_scl)
    x_, y_ = make_windows(w, input_size = window, horizon = horizon, shift = shift, n_cols = n_cols)
    
    if len(x_.shape) > 2:
        # Convertir a arreglo de 2 dimensiones
        rows, nx, ny = x_.shape
        x_ = x_.reshape((rows, nx*ny))
    
    print('---------------Inicio de experimento: '+ str(i) +' -----------------')
    
    bestparams = {'C': [1], 'epsilon': [0.1], 'gamma': [0.1],'kernel': ['rbf']}
    # Probando el SVR y GridSearchCV
    for j in range(3):
        print('--Etapa ' + str(j + 1) + '--')
        print('GS: '+ str(keys[j]))
        bestparams.update({keys[j]: params[keys[j]]})
        print(bestparams)
        reg = GridSearchCV(estimator = model, param_grid = bestparams, cv = tscv, scoring = 'r2', n_jobs = -1, verbose = True)
        reg.fit(x_, y_.ravel())
        bestparams.update({keys[j]: [reg.best_params_[keys[j]]]})
    
    print('Parametros del experimento ' + str(j + 1) + ': ' + str(bestparams))
    metrics = scores_cv(SVR(C = bestparams['C'][0], epsilon = bestparams['epsilon'][0], gamma = bestparams['gamma'][0]), x_, y_.ravel(), tscv, verbose = False)
    df_SVR.loc[len(df_SVR.index)] = ['SVR', comb[2], metrics['r2'], metrics['rmse'], metrics['mae'], [comb[0], comb[1], comb[2]], bestparams]
    
    print('---------------Fin de experimento: '+ str(i) +' -----------------')
print('---------------Lote de experimentos terminados -----------------')

In [18]:
#### CAMBIAR A MSVR
%%time
#SVR
svr = SVR()
model = MultiOutputRegressor(SVR())
# Parametros del modelo
params = {'C': [1, 10, 100, 1000], 'epsilon': [0.001, 0.01, 0.1, 1], 'gamma': [0.001, 0.01, 0.1, 1],'kernel': ['rbf']}
keys = ['epsilon', 'C', 'gamma']

# Time Series Cross Validatino
tscv = TimeSeriesSplit(n_splits = 4)
# Normalizacion
minmax_scl = MinMaxScaler()
bestparams = {'C': [1], 'epsilon': [0.1], 'gamma': [0.1],'kernel': ['rbf']}

comb = list_mimo[4]
df_train = df_Nizanda.copy()
drop_cols = [col for col in df_Nizanda.columns if col not in comb[0]]
df_train.drop(drop_cols, axis = 1, inplace = True)
minmax_scl.fit(df_train)

window = comb[1]
horizon = comb[2]
shift = 0
w, n_cols = data_preprocessing(df_train, minmax_scl)
x_, y_ = make_windows(w, input_size = window, horizon = horizon, shift = shift, n_cols = n_cols)
    
if len(x_.shape) > 2:
        # Convertir a arreglo de 2 dimensiones
        rows, nx, ny = x_.shape
        x_ = x_.reshape((rows, nx*ny))

for j in range(3):
        print('--Etapa ' + str(j + 1) + '--')
        print('GS: '+ str(keys[j]))
        bestparams.update({keys[j]: params[keys[j]]})
        print(bestparams)
        reg = GridSearchCV(estimator = model, param_grid = bestparams, cv = tscv, scoring = 'r2', n_jobs = -1, verbose = True)
        reg.fit(x_, y_.ravel())
        bestparams.update({keys[j]: [reg.best_params_[keys[j]]]})

--Etapa 1--
GS: epsilon
{'C': [1], 'epsilon': [0.001, 0.01, 0.1, 1], 'gamma': [0.1], 'kernel': ['rbf']}


ValueError: Found input variables with inconsistent numbers of samples: [17364, 416736]

In [23]:
print(y_.shape)

(17364, 24)


### Prueba REC

### Prueba MIMO