In [None]:
import time
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping

from src.comb import incremental_levels
from src.dataset import process_dataset

sns.set_context('notebook')
sns.set_style('whitegrid')

NUM_LAYERS = 3
NUM_UNITS = [8, 16, 32]
EPOCHS = 1000

In [None]:
df = pd.read_csv('../res/dataset.csv')
df

In [None]:
(xtr, ytr), (xvl, yvl), _ = process_dataset(df, val_split=0.2)
print(f'x train: {xtr.shape}, x val: {xvl.shape}')
print(f'y train: {ytr.shape}, y val: {yvl.shape}')

In [None]:
scores = {}
configurations = incremental_levels(num_levels=NUM_LAYERS, parameters=NUM_UNITS)

for idx, config in enumerate(configurations):
    print(f'Model {idx + 1:0{len(str(len(configurations)))}}/{len(configurations)}', end='')
    start_time = time.time()
    early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
    model = Sequential([Dense(hu, activation='relu') for hu in config] + [Dense(2)])
    model.compile(optimizer='adam', loss='mse')
    model.fit(xtr, ytr, validation_split=0.2, epochs=EPOCHS, callbacks=[early_stopping], verbose=False)
    ptr, pvl = model.predict(xtr), model.predict(xvl)
    config = config + (0,) * NUM_LAYERS
    scores[config[:NUM_LAYERS]] = {
        'train_mse': mean_squared_error(ytr, ptr),
        'train_r2': r2_score(ytr, ptr),
        'val_mse': mean_squared_error(yvl, pvl),
        'val_r2': r2_score(yvl, pvl)
    }
    print(f' -- elapsed time: {time.time() - start_time:.4}s')

In [None]:
results = pd.DataFrame.from_dict(scores, orient='index').sort_values('val_r2', ascending=False)
results

In [None]:
best_config = results.index[0]
(x, y), (x_scaler, y_scaler) = process_dataset(df, val_split=None)

early_stopping = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)
model = Sequential([Dense(hu, activation='relu') for hu in best_config if hu != 0] + [Dense(2)])
model.compile(optimizer='adam', loss='mse')
history = model.fit(x, y, validation_split=0.2, epochs=EPOCHS, callbacks=[early_stopping], verbose=True)

In [None]:
plt.figure(figsize=(18, 4))
sns.lineplot(x=history.epoch, y=history.history['loss'], label='train loss').set(title='Training History')
sns.lineplot(x=history.epoch, y=history.history['val_loss'], label='validation loss')
plt.show()

In [None]:
p = model.predict(x)
y, p = y_scaler.inverse_transform(y), y_scaler.inverse_transform(p)

_, axes = plt.subplots(1, 2, figsize=(18, 6), tight_layout=True)
for idx, title in enumerate(['Hospitalized Peak', 'Cumulative Deaths']):
    pp, yy = p[:, idx], y[:, idx]
    sns.scatterplot(x=pp, y=yy, ax=axes[idx]).set(xlabel='prediction', ylabel='target', title=title)
    axes[idx].plot([pp.min(), pp.max()], [pp.min(), pp.max()], 'r--')