In [6]:
from pathlib import Path
import tensorflow as tf
from keras.layers import LSTM, Conv2D, BatchNormalization
from keras.optimizers import Adam, SGD, RMSprop
from keras.models import Sequential
from keras.layers import Dense
from sklearn.linear_model import LinearRegression
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('dark_background')
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Using TensorFlow backend.


In [7]:
from src.data.task_1_food_security.dataset import Dataset

root_folder = Path.cwd().parent.parent
data_folder = Path(root_folder).joinpath('data', 'external')
ds = Dataset(root_folder=root_folder)
ds.prepare_dataset_array()

## Multidimensional prediction
- predict % od population in phase {1, 2,... 5}
- compare results obtained from different methods

Functions to get model

In [22]:

def create_model(optimizer="adam", lr=None, batch_norm=False):
    model = Sequential()
    model.add(LSTM(126, input_shape=(36, 11)))
    if batch_norm:
        model.add(BatchNormalization())
    model.add(Dense(5, activation='softmax'))
    if optimizer == "adam":
        if lr is not None:
            opt = Adam(lr=lr)
        else:
            opt = Adam()
    elif optimizer == "sgd":
        if lr is not None:
            opt = SGD(lr=lr)
        else:
            opt = SGD()
    elif optimizer == "rmsprop":
        if lr is not None:
            opt = RMSprop(lr=lr)
        else:
            opt = RMSprop()
    model.compile(loss=tf.keras.losses.MeanSquaredError(),
              optimizer=opt,
              metrics=[tf.keras.losses.MeanSquaredError()])
    return model

In [28]:
x = ds.X_train
y = ds.y_train_perc


# fix random seed for reproducibility
seed = 7
np.random.seed(seed)
# grid search epochs, batch size and optimizer
optimizers = ['sgd', 'adam', 'rmsprop']
lr = [0.01, 0.005, 0.001]
epochs = [100]
batches = [100]
batch_norm = [True, False]

param_grid = dict(lr = lr, epochs=epochs, batch_norm=batch_norm) # dict(optimizer=optimizers, epochs=epochs, batch_size=batches, batch_norm=batch_norm)
model = KerasRegressor(build_fn=create_model, verbose=0)
grid = GridSearchCV(estimator=model, param_grid=param_grid,
                    verbose=2,
                    scoring='neg_mean_squared_error')


In [16]:
x_train = ds.X_train
y_train = ds.y_train_perc

In [23]:
model = create_model()

In [24]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 126)               69552     
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 635       
Total params: 70,187
Trainable params: 70,187
Non-trainable params: 0
_________________________________________________________________


In [25]:
model.fit(x_train[:3], y_train[:3])

Epoch 1/1


<keras.callbacks.callbacks.History at 0x7f90a47d22d0>

In [26]:
model.predict(x_train[3:6])


array([[0.21051913, 0.22573857, 0.15358059, 0.19496626, 0.21519539],
       [0.21759059, 0.23899806, 0.14339954, 0.18212107, 0.21789066],
       [0.19965842, 0.23925616, 0.15335236, 0.19404565, 0.21368742]],
      dtype=float32)

In [29]:
grid.fit(x_train, y_train)


Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV] batch_norm=True, epochs=100, lr=0.01 ............................
[CV] ............. batch_norm=True, epochs=100, lr=0.01, total=  10.3s
[CV] batch_norm=True, epochs=100, lr=0.01 ............................


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   10.3s remaining:    0.0s


KeyboardInterrupt: 

In [None]:
grid.