In [1]:
# Import the dependencies

import numpy as np
import pandas as pd
import re

from matplotlib import pyplot as plt
from sklearn.metrics import r2_score

from datetime import timedelta,datetime

import sys
from path import Path

# import plotly.graph_objects as go

In [2]:
# Import the user-defined ml class

class_path = Path("../classes/pmmfs_ml")
sys.path.append(class_path)
from ml import ml

from ml_util import *

In [3]:
cleaned_df = pd.read_csv("Resources/cleaned_df.csv")

In [4]:
# Create four dataframes, one for each feature set to be explored for predicting future total cases and total deaths

df = cleaned_df[['date','iso_code','population','population_density','median_age','C6_Stay at home requirements','C7_Restrictions on internal movement','C8_International travel controls','E1_Income support','total_cases','total_deaths']]


In [5]:
# Earlier EDA has shown that, out of the researched neural network configurations, the following neural 
# network configuration produces the most accurate results

layers = { 
            "number_input_features": 0,   # number of input features will vary, value set elsewhere
            "n_layers": 4, 
            "l0":{"number_hidden_nodes":18,"activation_function":"relu"},
            "l1":{"number_hidden_nodes":9,"activation_function":"relu"},
            "l2":{"number_hidden_nodes":4,"activation_function":"relu"},
            "l3":{"number_hidden_nodes":1,"activation_function":"linear"}
        }

In [6]:
days_out = '60'

In [7]:
get_day_df(df,days_out)

Unnamed: 0,date,iso_code,date_60,date_60.1,total_cases_60,total_deaths_60
0,2020-03-13,ABW,2020-05-12,2020-05-12,101.0,3.0
1,2020-03-20,ABW,2020-05-19,2020-05-19,101.0,3.0
2,2020-03-24,ABW,2020-05-23,2020-05-23,101.0,3.0
3,2020-03-25,ABW,2020-05-24,2020-05-24,101.0,3.0
4,2020-03-26,ABW,2020-05-25,2020-05-25,101.0,3.0
...,...,...,...,...,...,...
31800,2020-08-27,ZWE,2020-10-26,,,
31801,2020-08-28,ZWE,2020-10-27,,,
31802,2020-08-29,ZWE,2020-10-28,,,
31803,2020-08-30,ZWE,2020-10-29,,,


In [8]:
# Regression Analysis

# Initialize the list of models
models = []

# Get the features
features = df.columns

# Get the list of future days dataframes
day_df = get_day_df(df,days_out)

# For each of the targets, namely total_cases and total_deaths
for target in ["total_cases","total_deaths"]:

    print(f"{target} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>\n\n")

    # Merge the feature dataframe with the day dataframe
    df_n = df.merge(day_df,on=["date","iso_code"],how="left")
    print(df_n.columns)

    # Remove the target columns from merged dataframe
    features = [f for f in features if re.search(target_regex,f) == None]
    features = features[2:]  # Remove date and iso_code

    # Set the number of input features for the neural networks based on the current feature set
    layers["number_input_features"] = len(features)

    # Create an instance of the ml class to start machine learning
    md = ml(df=df_n,
            feature_set=features,
            test_size=0.05,
            title=f"{target} - {days_out} days out",
            target=target,
            period=days_out,
            **layers
           )

    # Train and test the machine learning mode over 100 epochs
    md.train_test(epochs=100)

    # Append the instance of the ml class into the models list
    models.append(md)


total_cases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>


Index(['date', 'iso_code', 'population', 'population_density', 'median_age',
       'C6_Stay at home requirements', 'C7_Restrictions on internal movement',
       'C8_International travel controls', 'E1_Income support', 'total_cases',
       'total_deaths', 'date_60', 'date_60', 'total_cases_60',
       'total_deaths_60'],
      dtype='object')


 Training and testing - 60 days ahead


target_n = total_cases_60

features = ['population', 'population_density', 'median_age', 'C6_Stay at home requirements', 'C7_Restrictions on internal movement', 'C8_International travel controls', 'E1_Income support', 'total_cases', 'total_deaths']
population

population_density

median_age

C6_Stay at home requirements

C7_Restrictions on internal movement

C8_International travel controls

E1_Income support

total_cases

total_deaths

Model: "sequential"
_________________________________________________________________
L

Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9841354728240007
Testing  r2_score = 0.9763675493767534
total_deaths %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>


Index(['date', 'iso_code', 'population', 'population_density', 'median_age',
       'C6_Stay at home requirements', 'C7_Restrictions on internal movement',
       'C8_International travel controls', 'E1_Income support', 'total_cases',
       'total_deaths', 'date_60', 'date_60', 'total_cases_60',
       'total_deaths_60'],
      dtype='object')


 Training and testing - 60 days ahead


target_n = total_de

Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9780166916134524
Testing  r2_score = 0.9631563918650661


In [9]:
for md in models:
    df = md.get_df()
    print(df.columns)

Index(['date', 'iso_code', 'population', 'population_density', 'median_age',
       'C6_Stay at home requirements', 'C7_Restrictions on internal movement',
       'C8_International travel controls', 'E1_Income support', 'total_cases',
       'total_deaths', 'total_cases_60'],
      dtype='object')
Index(['date', 'iso_code', 'population', 'population_density', 'median_age',
       'C6_Stay at home requirements', 'C7_Restrictions on internal movement',
       'C8_International travel controls', 'E1_Income support', 'total_cases',
       'total_deaths', 'total_deaths_60'],
      dtype='object')


In [10]:
for md in models:
    df = md.get_df()
    model = md.get_model()
    features = md.get_features()
    X_scaler = md.get_X_scaler()
    y_scaler = md.get_y_scaler()
    
    target = md.get_target() + "_" + md.get_period()
    
    iso_codes = df["iso_code"].unique()
    
    for code in iso_codes:
        X = df[df["iso_code"] == code][features]
        
        X_scaled = X_scaler.transform(X)
        y_pred_scaled = model.predict(X_scaled)
        y_pred = y_scaler.inverse_transform(y_pred_scaled)
        
        df_ic = df[df["iso_code"] == code]
        
        time = [str(t) for t in df_ic["date"]]

        x_vals = df_ic["date"].values.tolist()
        y_test = df_ic[target].values.tolist()  
        y_pred_scaled = model.predict(X_scaled).reshape(-1,1).tolist()
        y_pred = [y_scaler.inverse_transform(y) for y in y_pred_scaled]
       
        
        df_c = pd.DataFrame({"Time":x_vals,"Actual":y_test, "Predicted": y_pred})

        plt.figure(figsize=[25,15])
        plt.plot(df_c["Time"], df_c["Actual"], c="Red")
        plt.plot(df_c["Time"], df_c["Predicted"], c="Green")
        
        plt.xlabel("Time")
        plt.ylabel("Actual/Predicted")
        
        plt.title(md.get_title() + "; Country: " + code)
        plt.xticks(rotation=90)
        plt.legend(["Actual","Predicted"])
        
        feature_str = ""
        for feature in features:
            if re.search(gr_regex,feature) != None:
                feature_str = feature_str + "_" + feature[:2]
                
        plt.savefig(f"Resources/graphs/AP_target-{md.get_target()}-features-{feature_str}-period-{md.get_period()}-country-{code}.png")
        plt.close()
