In [1]:
# Import the dependencies

import numpy as np
import pandas as pd
import re

from matplotlib import pyplot as plt
from sklearn.metrics import r2_score

from datetime import timedelta,datetime

import sys
from path import Path

# imports for database connection
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine, func

# import these to view table column headers & rows
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select

In [2]:
# Import the user-defined ml class

class_path = Path("../classes/pmmfs_ml")
sys.path.append(class_path)
from ml import ml

In [3]:
iso_codes = ["CAN","GBR","USA"]

# reflect an existing database into a new model (creates base class for automap schema)
Base = automap_base()

engine = create_engine("sqlite:///Resources/covid_db.db")

# reflect the tables
Base.prepare(engine, reflect=True)

Base.classes.keys()

# save references to each table
covid_stats = Base.classes.covid_stats
# government_regulation = Base.classes.government_regulation

# Create session (link) from Python to the database
session = sessionmaker(bind=engine)()

# View table column headers & rows - covid_stats
covid_stats_table = select('*').select_from(covid_stats)
covid_stats_result = session.execute(covid_stats_table).fetchall()

In [None]:
clean_df = government_regulation_df.merge(cover)

In [None]:
# View table column headers & rows - government_regulation
government_regulation_table = select('*').select_from(government_regulation)
government_regulation_result = session.execute(government_regulation_table).fetchall()

In [None]:
# Merge covid_stats_table with government regulation_table
joined = session.query(covid_stats.primary_key_reaction,\
                        covid_stats.date_updated,\
                        covid_stats.iso_code,\
                        covid_stats.continent,\
                        covid_stats.location,\
                        covid_stats.population,\
                        covid_stats.population_density,\
                        covid_stats.median_age,\
                        covid_stats.new_cases,\
                        covid_stats.percentage_of_Pop_New_Cases,\
                        covid_stats.new_deaths,\
                        covid_stats.percentage_of_Pop_New_Deaths,\
                        covid_stats.new_tests,\
                        covid_stats.percentage_of_Pop_New_Tests,\
                        covid_stats.total_cases_updated,\
                        covid_stats.percentage_of_Pop_Total_Cases,\
                        covid_stats.total_deaths_updated,\
                        covid_stats.percentage_of_Pop_Total_Deaths,\
                        covid_stats.total_tests_updated,\
                        covid_stats.percentage_of_Pop_Total_Tests,\
                        government_regulation.primary_key_response,\
                        government_regulation.Date_updated,\
                        government_regulation.CountryName,\
                        government_regulation.CountryCode,\
                        government_regulation.C1_School_closing,\
                        government_regulation.C1_Flag,\
                        government_regulation.C2_Workplace_closing,\
                        government_regulation.C2_Flag,\
                        government_regulation.C3_Cancel_public_events,\
                        government_regulation.C3_Flag,\
                        government_regulation.C4_Restrictions_on_gatherings,\
                        government_regulation.C4_Flag,\
                        government_regulation.C5_Close_public_transport,\
                        government_regulation.C5_Flag,\
                        government_regulation.C6_Stay_at_home_requirements,\
                        government_regulation.C6_Flag,\
                        government_regulation.C7_Restrictions_on_internal_movement,\
                        government_regulation.C7_Flag,\
                        government_regulation.C8_International_travel_controls,\
                        government_regulation.E1_Income_support,\
                        government_regulation.E1_Flag,\
                        government_regulation.H1_Public_information_campaigns,\
                        government_regulation.H1_Flag,\
                        government_regulation.StringencyIndex_updated,\
                        government_regulation.percentage_change_stringency,\
                        government_regulation.StringencyLegacyIndex_updated,\
                        government_regulation.GovernmentResponseIndex_updated,\
                        government_regulation.percentage_change_GovernmentResponse,\
                        government_regulation.ContainmentHealthIndex_updated,\
                        government_regulation.percentage_change_ContainmentHealth,\
                        government_regulation.EconomicSupportIndex_updated,\
                        government_regulation.percentage_change_EconomicSupport)\
        .join(government_regulation,government_regulation.primary_key_response == covid_stats.primary_key_reaction)

# save the query results as a Pandas DataFrame and set the index
cleaned_df = pd.DataFrame(joined, columns=[ 'primary_key_reaction',
                                    'date_updated',
                                    'iso_code',
                                    'continent',
                                    'location',
                                    'population',
                                    'population_density',
                                    'median_age',
                                    'new_cases',
                                    'percentage_of_Pop_New_Cases',
                                    'new_deaths',
                                    'percentage_of_Pop_New_Deaths',
                                    'new_tests',
                                    'percentage_of_Pop_New_Tests',
                                    'total_cases_updated',
                                    'percentage_of_Pop_Total_Cases',
                                    'total_deaths_updated',
                                    'percentage_of_Pop_Total_Deaths',
                                    'total_tests_updated',
                                    'percentage_of_Pop_Total_Tests',
                                    'primary_key_response',
                                    'Date_updated',
                                    'CountryName',
                                    'CountryCode',
                                    'C1_School_closing',
                                    'C1_Flag',
                                    'C2_Workplace_closing',
                                    'C2_Flag',
                                    'C3_Cancel_public_events',
                                    'C3_Flag',
                                    'C4_Restrictions_on_gatherings',
                                    'C4_Flag',
                                    'C5_Close_public_transport',
                                    'C5_Flag',
                                    'C6_Stay_at_home_requirements',
                                    'C6_Flag',
                                    'C7_Restrictions_on_internal_movement',
                                    'C7_Flag',
                                    'C8_International_travel_controls',
                                    'E1_Income_support',
                                    'E1_Flag',
                                    'H1_Public_information_campaigns',
                                    'H1_Flag',
                                    'StringencyIndex_updated',
                                    'percentage_change_stringency',
                                    'StringencyLegacyIndex_updated',
                                    'GovernmentResponseIndex_updated',
                                    'percentage_change_GovernmentResponse',
                                    'ContainmentHealthIndex_updated',
                                    'percentage_change_ContainmentHealth',
                                    'EconomicSupportIndex_updated',
                                    'percentage_change_EconomicSupport'
                                ])

In [27]:
cleaned_df = pd.read_csv("Resources/merged_covid_FINAL.csv")

In [28]:
cleaned_df.columns

Index(['primary_key_reaction', 'date_updated', 'iso_code', 'continent',
       'location', 'population', 'population_density', 'median_age',
       'new_cases', 'percentage_of_Pop_New_Cases', 'new_deaths',
       'percentage_of_Pop_New_Deaths', 'new_tests',
       'percentage_of_Pop_New_Tests', 'total_cases_updated',
       'percentage_of_Pop_Total_Cases', 'total_deaths_updated',
       'percentage_of_Pop_Total_Deaths', 'total_tests_updated',
       'percentage_of_Pop_Total_Tests', 'primary_key_response',
       'C1_School_closing', 'C1_Flag', 'C2_Workplace_closing', 'C2_Flag',
       'C3_Cancel_public_events', 'C3_Flag', 'C4_Restrictions_on_gatherings',
       'C4_Flag', 'C5_Close_public_transport', 'C5_Flag',
       'C6_Stay_at_home_requirements', 'C6_Flag',
       'C7_Restrictions_on_internal_movement', 'C7_Flag',
       'C8_International_travel_controls', 'E1_Income_support', 'E1_Flag',
       'H1_Public_information_campaigns', 'H1_Flag', 'StringencyIndex_updated',
       'percenta

In [31]:
cleaned_df.isnull().sum()

primary_key_reaction                    0
date_updated                            0
iso_code                                0
continent                               0
location                                0
population                              0
population_density                      0
median_age                              0
new_cases                               0
percentage_of_Pop_New_Cases             0
new_deaths                              0
percentage_of_Pop_New_Deaths            0
new_tests                               0
percentage_of_Pop_New_Tests             0
total_cases_updated                     0
percentage_of_Pop_Total_Cases           0
total_deaths_updated                    0
percentage_of_Pop_Total_Deaths          0
total_tests_updated                     0
percentage_of_Pop_Total_Tests           0
primary_key_response                    0
C1_School_closing                       0
C1_Flag                                 0
C2_Workplace_closing              

In [13]:
# This function converts a date string of the format YYYYMMDD to YYYY-MM-DD

def formatDate(dt):
    d_str = str(dt)
    year = d_str[0:4]
    month = d_str[4:6]
    day = d_str[6:8]
    dt_str = year + "-" + month + "-" + day
     
    return dt_str

In [32]:
# Create four dataframes, one for each feature set to be explored for predicting future total cases and total deaths

df1 = cleaned_df[['date_updated','iso_code','population','population_density','median_age','C6_Stay_at_home_requirements','C7_Restrictions_on_internal_movement','C8_International_travel_controls','total_cases_updated','total_deaths_updated']]
df2 = cleaned_df[['date_updated','iso_code','population','population_density','median_age','C1_School_closing','C2_Workplace_closing','C3_Cancel_public_events','C6_Stay_at_home_requirements','C7_Restrictions_on_internal_movement','C8_International_travel_controls','total_cases_updated','total_deaths_updated']]
df3 = cleaned_df[['date_updated','iso_code','population','population_density','median_age','C1_School_closing','C2_Workplace_closing','C3_Cancel_public_events','C4_Restrictions_on_gatherings','C5_Close_public_transport','C6_Stay_at_home_requirements','C7_Restrictions_on_internal_movement','C8_International_travel_controls','total_cases_updated','total_deaths_updated']]
df4 = cleaned_df[['date_updated','iso_code','population','population_density','median_age','C6_Stay_at_home_requirements','C7_Restrictions_on_internal_movement','C8_International_travel_controls','E1_Income_support','total_cases_updated','total_deaths_updated']]
# Put the dataframe in a list
dfs = [df1, df2, df3, df4]


In [33]:
# The following function creates a dataframe for each individual iso_code (country code), which contains the 
# following information:
# - date, iso_code, future date

# It returns a dictionary of the country codes dataframes and a list of the iso_codes used as dictionary key values

# Input: 
# df - A list of dataframes

# Returns:
# - A countries dictionary, each element of which contains future date dictionaries, each of which has 
# three columns: data, iso_code, and future date
# - A iso_codes list contain a unique list of all the country codes

def get_iso_dicts(df):
    
    # Get a lost of the iso_codes
    #  iso_codes = df["iso_code"].unique()
    # iso_codes = ["CAN","GBR","USA"]
    
    # Initialize the country codes dictionary
    iso_dicts = {}
    
    # Iterate through the iso_cides
    for code in iso_codes:
        
        # Create a dictionary for the current iso_code
        iso_dicts[code] = {}
        
        # For each value of day in the future list (30,45,60, and 75)
        for day in future:
            
            # Create the future date column name
            date_col = "_".join(["date_updated",day])
            
            # Copy the date and iso_code columns to a new dataframe, df_iso
            df_iso = cleaned_df[cleaned_df["iso_code"]==code][["date_updated","iso_code"]]
            
            # Set the future date column to the current date + the current value of day
            df_iso[date_col] = pd.to_datetime(df_iso["date_updated"]) + timedelta(days=int(day))
            
            # Set the type of the future date column to str so that it can be used in merge operations
            df_iso[date_col] = df_iso[date_col].astype(str)

            # Add the dataframe to the iso_dicts country dictionaries 
            iso_dicts[code][day] = df_iso
                    
            if code == "CAN":
                can_df = df_iso.copy()
                
    # Return the country dataframes and the country codes        
    return iso_dicts, iso_codes

In [34]:
# This function groups all the country-day dataframe by the number of days into the future (30,45,60,75) into
# a list of four dataframes
#
# Input:
# iso_codes - country codes list
# iso_dicts - country-future-dates dictionaries
#
# Returns:
# A list of dataframes, one for each of the future days being considered
def get_cum_days(*iso_codes, **iso_dicts):
    
    # Initialize the days dataframe list
    days = []

    # For each day value in futures ('30','45','60','75')
    for day in future:
        
        # Create a new cumulative dataframe
        cum_df = pd.DataFrame()
        
        # For each iso code (country code)
        for code in iso_codes:
            
            # Append the dataframe corresponding to the country and the future days to the current cumulative dataframe
            cum_df = cum_df.append(iso_dicts[code][day])

        # Add the current cumulative dataframe to the days dataframe list
        days.append(cum_df.reset_index(drop=True))
    
    return days

In [35]:
# This function creates the future total cases and future total deaths columns by left joining the days dataframes
# with the original dataframe on the future date, the current date, and the iso_code columns.

# Input:
# days - list of future date dataframe

# Return:
# m_days - list of dataframes containing future total deaths and future total cases

def get_amended_days(*days):
        
    # Initialize the amended days dataframe list
    m_days = []
    
    # Iterate through the values in the future list ['30','45','60','75']
    for i,day in enumerate(future):
        
        # Merge the current days dataframe with the original dataframe and append it to m_days
        m_days.append(days[i].merge(cleaned_df,left_on=["date_updated_" + day,"iso_code"],right_on=["date_updated","iso_code"],how="left",suffixes=["","_" + day]))
        
        # Drop the index of the merged dataframe
        m_days[i] = m_days[i].reset_index(drop=True)
        
        # Choose the useful columns from the merged dataframe
        m_days[i] = m_days[i][["date_updated","iso_code","date_updated_" + day,"total_cases_updated","total_deaths_updated"]]
        
        # Rename the total_cases and total_deaths case to identify to which future period they belong
        m_days[i].rename(columns={"total_cases_updated":"total_cases_" + day,
                           "total_deaths_updated":"total_deaths_" + day},inplace=True)
        
    return m_days

In [36]:
# This function creates the list of day dataframes, one for each of the future periods under consideration. 
# The dataframes contain the following columns: date, iso_code, future_date, future total cases, future total deaths 

def get_days(df):
    
    # Get the country-days dicts and the country iso codes
    iso_dicts, iso_codes = get_iso_dicts(df)
    
    # Get the list of days dataframes for each of the future period lengths
    days = get_cum_days(*iso_codes, **iso_dicts)
        
    # Append to the list the future total deaths and the future total cases
    days = get_amended_days(*days)

    return days

In [37]:
# Earlier EDA has shown that, out of the researched neural network configurations, the following neural 
# network configuration produces the most accurate results

layers = { 
            "number_input_features": 0,   # number of input features will vary, value set elsewhere
            "n_layers": 4, 
            "l0":{"number_hidden_nodes":18,"activation_function":"relu"},
            "l1":{"number_hidden_nodes":9,"activation_function":"relu"},
            "l2":{"number_hidden_nodes":4,"activation_function":"relu"},
            "l3":{"number_hidden_nodes":1,"activation_function":"linear"}
        }

In [38]:
# Regression Analysis

can_df = None

# Initialize the future days list 
future = ["30"]

# Initialize the list of models
models = []

# For each of the feature lists under consideration
for n,df in enumerate(dfs):

#     # Get the features
#     features = df.columns
    
    # Get the list of future days dataframes
    days = get_days(df)
        
    # For each of the targets, namely total_cases and total_deaths
    for target in ["total_cases","total_deaths"]:
        
        print(f"{target} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>\n\n")
        
        # For each day in future ['30','45','60','75']
        for i,day in enumerate(future):
            
            print(f"\n\n>>>>>>>> {day} >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ")
            
            # Get the features
            features = df.columns
            
            # Merge the feature dataframe with the day dataframe
            df_n = df.merge(days[i],on=["date_updated","iso_code"],how="left")
            df_n.to_csv(f"Resources/Model-{n}-Days-{future[i]}.csv")
            
            # Remove the target columns from merged dataframe
            print(f"features3 = {features}")
            features = [f for f in features if re.search("_\d{2}$",f) == None]
            print(f"features4 = {features}")
            #print(f"features3: {features}")
            features = features[2:]  # Remove date and iso_code
            #print(f"features4: {features}")
            # Set the number of input features for the neural networks based on the current feature set
            layers["number_input_features"] = len(features)

            # Create an instance of the ml class to start machine learning
            md = ml(df=df_n,
                    feature_set=features,
                    test_size=0.05,
                    title=f"{target} - {day} days out",
                    target=target,
                    period=day,
                    **layers
                   )
            
            # Train and test the machine learning mode over 100 epochs
            print("train_test")
            md.train_test(epochs=100)

            # Append the instance of the ml class into the models list
            models.append(md)


total_cases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
       'median_age', 'C6_Stay_at_home_requirements',
       'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype='object')
features4 = ['date_updated', 'iso_code', 'population', 'population_density', 'median_age', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'total_cases_updated', 'total_deaths_updated']
train_test


 Training and testing - 30 days ahead


target_n = total_cases_30

features = ['population', 'population_density', 'median_age', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'total_cases_updated', 'total_deaths_updated']
populatio

Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9994420968563372
Testing  r2_score = 0.9972932191908938
total_deaths %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
       'median_age', 'C6_Stay_at_home_requirements',
       'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype='object')
features4 = ['dat

Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.998449700873543

Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9968220376498736
Testing  r2_score = 0.9971436216834759
total_deaths %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
       'median_age', 'C1_School_closing', 'C2_Workplace_closing',
       'C3_Cancel_public_events', 'C6_Stay_at_home_requirements',
       'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype='object')
features4 = ['date_updated', 'iso_code', 'population', 'population_density', 'median_age', 'C1_School_closing', 'C2_Workplace_closing', 'C3_Cancel_public_events', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_

Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.8888891110072191
Testing  r2_score = 0.9623881130608187
total_cases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
    

Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoc

Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.999423365065873
Testing  r2_score = 0.9994354142678347
total_deaths %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
       'median_age', 'C1_School_closing', 'C2_Workplace_closing',
       'C3_Cancel_public_events', 'C4_Restrictions_on_gatherings',
       'C5_Close_public_transport', 'C6_Stay_at_home_requirements',
       'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype='object')
features4 = ['date_updated', 'iso_code', 'population', 'population_density', 'median_age', 'C1_School_closing', 'C2_Workplace_closing', 'C3_Can

Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9991163105010938
Testing  r2_score = 0.9989805712249856
total_cases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['da

Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch

Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9993459446569048
Testing  r2_score = 0.9972148825944549
total_deaths %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
       'median_age', 'C6_Stay_at_home_requirements',
       'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'E1_Income_support',
       'total_cases_updated', 'total_deaths_updated'],
      dtype='object')
features4 = ['date_updated', 'iso_code', 'population', 'population_density', 'median_age', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'E1_Income_support', 'total_cases_updated', 'total_deaths_updated']
train_test


 T

Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9972414475479667
Testing  r2_score = 0.9945919215570187


for md in models:
    df = md.get_df()
    print(df.columns)

In [39]:
day_30 = get_days(df)[0]
day_30

Unnamed: 0,date_updated,iso_code,date_updated_30,date_updated_30.1,total_cases_30,total_deaths_30
0,2020-01-01,CAN,2020-01-31,2020-01-31,3.0,0.0
1,2020-01-02,CAN,2020-02-01,2020-02-01,4.0,0.0
2,2020-01-03,CAN,2020-02-02,2020-02-02,4.0,0.0
3,2020-01-04,CAN,2020-02-03,2020-02-03,4.0,0.0
4,2020-01-05,CAN,2020-02-04,2020-02-04,4.0,0.0
...,...,...,...,...,...,...
727,2020-08-27,USA,2020-09-26,,,
728,2020-08-28,USA,2020-09-27,,,
729,2020-08-29,USA,2020-09-28,,,
730,2020-08-30,USA,2020-09-29,,,


In [47]:
day_30[day_30["iso_code"] == "CAN"]

Unnamed: 0,date_updated,iso_code,date_updated_30,date_updated_30.1,total_cases_30,total_deaths_30
0,2020-01-01,CAN,2020-01-31,2020-01-31,3.0,0.0
1,2020-01-02,CAN,2020-02-01,2020-02-01,4.0,0.0
2,2020-01-03,CAN,2020-02-02,2020-02-02,4.0,0.0
3,2020-01-04,CAN,2020-02-03,2020-02-03,4.0,0.0
4,2020-01-05,CAN,2020-02-04,2020-02-04,4.0,0.0
...,...,...,...,...,...,...
239,2020-08-27,CAN,2020-09-26,,,
240,2020-08-28,CAN,2020-09-27,,,
241,2020-08-29,CAN,2020-09-28,,,
242,2020-08-30,CAN,2020-09-29,,,


In [56]:
for i,md in enumerate(models):
    print(f"model #: {i}")
    # df = md.get_df()
    df = cleaned_df
    model = md.get_model()
    features = md.get_feature_set().copy()
    print(f"features1 = {features}")
    X_scaler = md.get_X_scaler()
    y_scaler = md.get_y_scaler()
    
    target = md.get_target() + "_" + md.get_period()
    print(f"target ====> {target}")
    
    print(f"features2 = {features}")
    
    #iso_codes = df["iso_code"].unique()
    
    for code in iso_codes:
        X = df[df["iso_code"] == code][md.get_feature_set()]
        print(f"X.columns = {X.columns}")
        X_scaled = X_scaler.transform(X)
        y_pred_scaled = model.predict(X_scaled)
        y_pred = y_scaler.inverse_transform(y_pred_scaled)
        
        df_ic = df[df["iso_code"] == code].copy()
        
        time = [str(t) for t in df_ic["date_updated"]]

        x_vals = df_ic["date_updated"].values.tolist()
        
        try:
            y_test = df_ic[target].values.tolist()
        except:
            # Just keep going
            pass

            
        feature_str = ""
        for feature in features:
            if re.search('^[CEH\d]',feature) == None:
                feature_str = feature_str + "_" + feature
            else:
                feature_str = feature_str + "_" + feature[:2]
                
        # df_c = pd.DataFrame({"Time":x_vals,"Actual":y_test, "Predicted": y_pred})
        df_ic[target + "_pred"] = y_pred
        
        # df_target = pd.Dataframe(y_test, index = df_ic.index)
        
        # print(f"target = {day_30[day_30['iso_code']==code][target].fillna(0)}")
        
        
        df_ic = df_ic.merge(day_30,on=["date_updated","iso_code"],how="left")
        
        df_ic.to_csv(f"Resources/predictions/target-{md.get_target()}-features-{feature_str}-period-{md.get_period()}-country-{code}.csv")
        
#         plt.figure(figsize=[25,15])
#         plt.plot(df_c["Time"], df_c["Actual"], c="Red")
#         plt.plot(df_c["Time"], df_c["Predicted"], c="Green")
        
#         plt.xlabel("Time")
#         plt.ylabel("Actual/Predicted")
        
#         plt.title(md.get_title() + "; Country: " + code)
#         plt.xticks(rotation=90)
#         plt.legend(["Actual","Predicted"])        
        
#         plt.savefig(f"Resources/graphs/target-{md.get_target()}-features-{feature_str}-period-{md.get_period()}-country-{code}.png")
#         plt.close()


model #: 0
features1 = ['population', 'population_density', 'median_age', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'total_cases_updated', 'total_deaths_updated']
target ====> total_cases_30
features2 = ['population', 'population_density', 'median_age', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'total_cases_updated', 'total_deaths_updated']
X.columns = Index(['population', 'population_density', 'median_age',
       'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype='object')
X.columns = Index(['population', 'population_density', 'median_age',
       'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype

X.columns = Index(['population', 'population_density', 'median_age', 'C1_School_closing',
       'C2_Workplace_closing', 'C3_Cancel_public_events',
       'C4_Restrictions_on_gatherings', 'C5_Close_public_transport',
       'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype='object')
model #: 6
features1 = ['population', 'population_density', 'median_age', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'E1_Income_support', 'total_cases_updated', 'total_deaths_updated']
target ====> total_cases_30
features2 = ['population', 'population_density', 'median_age', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'E1_Income_support', 'total_cases_updated', 'total_deaths_updated']
X.columns = Index(['population', 'population_density', 'median_age'

for ml in models:
    print(ml.get_feature_set())
    print(ml.get_target())
    print(ml.get_title())
    # print(ml.get_df())
    iso_df = final_df[ml.get_feature_set()]
    print(iso_df.columns)

In [42]:
df_target = pd.DataFrame(y_test, index = df_ic.index)
df_target

NameError: name 'y_test' is not defined