In [1]:
%cd ../script

/Users/andrejerkelens/Desktop/GPCE/gpce-covid/v0/script


In [None]:
# import IPython
# IPython.Application.instance().kernel.do_shutdown(True)

In [2]:
import tensorflow as tf
import os, gc, json
import pandas as pd
from pandas import to_datetime
from utils import train_validation_test_split
import argparse
from utils import scale_back, calculate_result, sumCases

# For plotting
import matplotlib.pyplot as plt
import seaborn as sns
# Apply the default theme
sns.set_theme()
sns.set(font_scale = 1.5)

import sys
sys.path.append( '..' )
from Class.Trainer import Trainer
from Class.ParameterManager import ParameterManager
from Class.DataProcessor import DataProcessor
from Class.Plotter import PlotResults, PlotWeights

In [3]:
checkpoint_folder = '../output/checkpoints'

In [4]:
figure_folder = os.path.join('../output','figures')

In [5]:
if not os.path.exists(checkpoint_folder):
        os.makedirs(checkpoint_folder, exist_ok=True)

if not os.path.exists(figure_folder):
    os.makedirs(figure_folder, exist_ok=True)

In [6]:
print(f'Loading config.json from ../config_2022_May.json')
with open('../config_2022_May.json') as inputfile:
    config = json.load(inputfile)
    inputfile.close()

Loading config.json from ../config_2022_May.json


In [7]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [8]:
dataPath = '../2022_May/Population_cut.csv'

In [9]:
print(f'Loading input data from {dataPath}')
df = pd.read_csv(dataPath)
print(f'Input feature file shape {df.shape}')

Loading input data from ../2022_May/Population_cut.csv
Input feature file shape (405000, 21)


In [10]:
df['Date'] = to_datetime(df['Date']) 
df['FIPS'] = df['FIPS'].astype(str)

In [11]:
parameterManager = ParameterManager(config)

In [12]:
df.columns

Index(['FIPS', 'AgeDist', 'AirPollution', 'HealthDisp', 'Name', 'Date',
       'DiseaseSpread', 'Transmission', 'VaccinationFull', 'SocialDist',
       'Cases', 'Deaths', 'TimeFromStart', 'LinearSpace', 'Constant',
       'LinearTime', 'P2Time', 'P3Time', 'P4Time', 'CosWeekly', 'SinWeekly'],
      dtype='object')

In [13]:
print(f'Column mappings: {parameterManager.col_mappings}\n')

"""# Train validation split and Scaling"""
train_data, validation_data, test_data, target_scaler = train_validation_test_split(df, parameterManager, scale=True)
print(f'Number train data is {train_data.shape[0]}, validation {validation_data.shape[0]}, test {test_data.shape[0]}')

gc.collect()

Column mappings: {'Static': ['AgeDist', 'AirPollution', 'HealthDisp'], 'ID': ['FIPS'], 'Time': ['TimeFromStart'], 'Target': ['Cases', 'Deaths'], 'Future': ['LinearSpace', 'Constant', 'LinearTime', 'P2Time', 'P3Time', 'P4Time', 'CosWeekly', 'SinWeekly'], 'Known Regular': ['AgeDist', 'AirPollution', 'HealthDisp', 'DiseaseSpread', 'Transmission', 'VaccinationFull', 'SocialDist']}

Number train data is 320000, validation 41000, test 44000


0

In [14]:
trainer = Trainer(parameterManager)
model = trainer.create_model()

optimizer_params = parameterManager.optimizer_params
optimizer = tf.keras.optimizers.Adam(
    learning_rate=optimizer_params['learning_rate'], clipnorm=optimizer_params['clipnorm']
)

Metal device set to: Apple M1 Pro


2022-07-23 16:46:11.998101: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-07-23 16:46:11.998324: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [15]:
checkpoint_folder

'../output/checkpoints'

In [16]:
checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
checkpointManager = tf.train.CheckpointManager(checkpoint, checkpoint_folder, max_to_keep=1)

model = trainer.load_from_checkpoint(checkpoint, checkpointManager.latest_checkpoint)
if model is None:
    sys.exit(-1)

Checkpoint restored from ../output/checkpoints/ckpt-8


In [17]:
dataProcessor = DataProcessor(
    parameterManager.total_sequence_length, parameterManager.col_mappings, parameterManager.data_params
)
"""### Train"""
train_batch = dataProcessor.prepare_batch(train_data)
train_preds, train_actuals, train_attn_weights = trainer.predict(model, train_batch)

train_actuals = scale_back(train_actuals, target_scaler, parameterManager.target_sequence_length)
train_preds = scale_back(train_preds, target_scaler, parameterManager.target_sequence_length)

train_mae, train_rmse, train_smape = calculate_result(train_actuals, train_preds, split=True)
print(f'Train MAE {train_mae}, RMSE {train_rmse}, SMAPE {train_smape}')
gc.collect()

2022-07-23 16:46:35.515457: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-07-23 16:46:35.517389: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-23 16:46:36.898631: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-23 16:46:36.958506: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-23 16:51:14.432825: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-23 16:51:16.429148: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.
2022-07-23 16:51:16.578142: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113

Train MAE [728.6282, 6.9243], RMSE [1015.3225, 13.0028], SMAPE [1.5761, 1.6715]


0

In [None]:
"""### Validation"""
validation_batch = dataProcessor.prepare_batch(validation_data)
validation_preds, validation_actuals, _ = trainer.predict(model, validation_batch)

validation_preds = scale_back(validation_preds, target_scaler, parameterManager.target_sequence_length)
validation_actuals = scale_back(validation_actuals,  target_scaler, parameterManager.target_sequence_length)

validation_mae, validation_rmse, validation_smape = calculate_result(validation_actuals, validation_preds, split=True)
print(f'Validation MAE {validation_mae}, RMSE {validation_rmse}, SMAPE {validation_smape}')

In [None]:
"""### Test"""

test_batch = dataProcessor.prepare_batch(test_data)
test_preds, test_actuals, _ = trainer.predict(model, test_batch)

test_actuals = scale_back(test_actuals, target_scaler, parameterManager.target_sequence_length) 
test_preds = scale_back(test_preds, target_scaler, parameterManager.target_sequence_length)

test_mae, test_rmse, test_smape = calculate_result(test_actuals, test_preds, split=True)
print(f'Test MAE {test_mae}, RMSE {test_rmse}, SMAPE {test_smape}')

#del model
gc.collect()

In [None]:
number_of_locations = df[parameterManager.col_mappings['ID']].nunique().values[0]
print(f'Number of locations {number_of_locations}')
locs = df[parameterManager.col_mappings['ID']].iloc[:number_of_locations, 0].values

In [None]:
[str(target) for target in parameterManager.target_column]

In [None]:
"""
Test prediction
"""
targets, predictions = sumCases(train_actuals,train_preds, number_of_locations)

PlotC = PlotResults(targets, predictions, parameterManager.train_start, locs, figure_folder, parameterManager.target_column)
plot_titles = [f'Summed plot (Validation) MAE {mae:0.3f}, RMSE {rmse:0.3f}, SMAPE {smape:0.3f}' for mae,rmse,smape in zip(validation_mae, validation_rmse, validation_smape)]
figure_names = ['Summed validation: ' + str(target) for target in parameterManager.target_column]

In [None]:
PlotC.makeSummedPlot(plot_titles, figure_names=figure_names)

In [None]:
import numpy as np

In [None]:
parameterManager.train_start + np.timedelta()

In [None]:
parameterManager.train_start

In [None]:
import numpy as np

In [None]:
[parameterManager.train_start + np.timedelta64(days, 'D') for days in range(627)]

In [None]:
targets.shape

In [None]:
np.sum(targets, axis=0).reshape(-1, len(parameterManager.target_column)).shape

In [None]:
targets, predictions = sumCases(train_actuals, train_preds, number_of_locations)

resultPlotter = PlotResults(targets, predictions, parameterManager.train_start, locs, figure_folder)
plot_title = f'Summed plot (train) MAE {train_mae:0.3f}, RMSE {train_rmse:0.3f}'

resultPlotter.makeSummedPlot(plot_title, figure_name='Summed plot - train', figsize=(24, 8))

In [None]:
df.groupby('Date').sum()['Cases']