<a href="https://colab.research.google.com/github/ChepChaf/DH-DL/blob/master/Model_Search.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip3 install talos

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.layers import Dense, Dropout, BatchNormalization
from keras.models import Sequential
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

import talos as ta
from talos.utils import hidden_layers, early_stopper

import time

%matplotlib inline

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
# Importar data

X, y = pd.read_pickle('X.pkl'), pd.read_pickle('y.pkl')

X.head()

In [0]:
y.head()

In [0]:
# Standarizar los valores

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [0]:
# Callbacks:
# EarlyStopping para evitar seguir entrenando cuando ya alcanzó el minimo en validación
es = EarlyStopping(min_delta=0.001, patience=5, restore_best_weights=True)
# ModelCheckpoint para guardar el mejor modelo entrenado en un archivo
mchkp = ModelCheckpoint('initial-model.h5', save_best_only=False)
# ReduceLROnPlateau para ir disminuyendo en learning rate cuando se acerque al minimo
rlr = ReduceLROnPlateau()

In [0]:
# Modelo inicial con valores escogidos a mano
model = Sequential()
model.add(Dense(128, input_shape=(X_train.shape[1],), activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(128, activation='relu'))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_logarithmic_error', metrics=['mae'])
history = model.fit(X_train, y_train, epochs=250, batch_size=2, validation_split=0.2, callbacks=[es, mchkp, rlr])

In [0]:
plt.figure(figsize=(8, 8))
plt.plot(np.arange(len(history.history['loss'])), history.history['loss'], label='loss')
plt.plot(np.arange(len(history.history['val_loss'])), history.history['val_loss'], label='val_loss')

plt.show()

In [0]:
model.evaluate(X_test, y_test)

In [0]:
predictions = model.predict(X_test)

In [0]:
plt.figure(figsize=(16, 8))
plt.plot(np.arange(len(predictions)), predictions, color='r', linestyle='--', linewidth=2.0)
plt.plot(np.arange(len(y_test)), y_test)

In [0]:
df = pd.DataFrame(data={ 'predictions': predictions.flatten(), 'y_test': y_test.values })
df.describe()

In [0]:
# Definir una función con el modelo para utilizar en la busqueda de hyperparametros con talos

def ted_model(X_train, y_train, X_val, y_val, params):
  rlr = ReduceLROnPlateau()

  timestr = time.strftime("%Y%m%d-%H%M%S")
  
  network = Sequential()
  
  network.add(Dense(params['first_neuron'], input_shape=(X_train.shape[1],), activation='relu'))
  network.add(Dropout(params['dropout']))

  hidden_layers(network, params, 1)
  
  network.add(BatchNormalization())
  network.add(Dense(1))
  
  network.compile(optimizer=params['optimizer'], loss=params['loss'], metrics=['mae', 'mape'])
  
  out = network.fit(
      X_train, y_train,
      epochs=50,
      batch_size=params['batch_size'],
      validation_data=[X_val, y_val],
      callbacks=[rlr]
  )
  
  return out, network

In [0]:
# Este dictionary contiene los parametros a probar por talos
# Estos valores los vamos eligiendo según lo que vemos que anda mejor con talos
params = {
    'first_neuron': [512, 256],
    'activation': ['relu'],
    'dropout': [0, 0.25],
    'hidden_layers': [3, 4],
    'shapes': ['brick'],
    'optimizer': ['adam'],
    'batch_size': [64, 128],
    'loss': ['mean_squared_error']
}

In [0]:
scan = ta.Scan(
    X_train, y_train.values.reshape(-1, 1), 
    params=params, 
    model=ted_model, 
    experiment_name='7.1', 
    val_split=.2, 
    reduction_metric='val_loss',
    minimize_loss=True,
    clear_session=True
)

In [0]:
# Armar un zip con los resultados de la busqueda de talos
ta.Deploy(scan, '7.1', metric='val_loss')

In [0]:
# Mover a google drive para no perderla si colab explota
!cp /content/7.1.zip "/content/drive/My Drive/talos_experiments/7.1.zip"
!cp -r /content/7.1 "/content/drive/My Drive/talos_experiments/7.1"

In [0]:
# El mejor modelo de este experimento
scan.best_model(metric='val_loss').summary()

In [0]:
scan.data.sort_values('val_loss')