In [22]:
# Import the dependencies

import numpy as np
import pandas as pd
import re

from matplotlib import pyplot as plt
from sklearn.metrics import r2_score

from datetime import timedelta,datetime

import sys
from path import Path

# imports for database connection
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy import create_engine, func

# import these to view table column headers & rows
from sqlalchemy.orm import sessionmaker
from sqlalchemy import select

In [23]:
# Import the user-defined ml class

class_path = Path("../classes/pmmfs_ml")
sys.path.append(class_path)
from ml import ml

In [24]:
# reflect an existing database into a new model (creates base class for automap schema)
Base = automap_base()

In [25]:
engine = create_engine("sqlite:///Resources/covid_db.db")

In [26]:
# reflect the tables
Base.prepare(engine, reflect=True)

In [27]:
Base.classes.keys()

['covid_stats', 'government_regulation']

In [28]:
# save references to each table
covid_stats = Base.classes.covid_stats
government_regulation = Base.classes.government_regulation

In [29]:
# Create session (link) from Python to the database
session = sessionmaker(bind=engine)()

In [30]:
# View table column headers & rows - covid_stats
covid_stats_table = select('*').select_from(covid_stats)
covid_stats_result = session.execute(covid_stats_table).fetchall()

In [31]:
# View table column headers & rows - government_regulation
government_regulation_table = select('*').select_from(government_regulation)
government_regulation_result = session.execute(government_regulation_table).fetchall()

In [32]:
# Merge covid_stats_table with government regulation_table
joined = session.query(covid_stats.primary_key_reaction,\
                        covid_stats.date_updated,\
                        covid_stats.iso_code,\
                        covid_stats.continent,\
                        covid_stats.location,\
                        covid_stats.population,\
                        covid_stats.population_density,\
                        covid_stats.median_age,\
                        covid_stats.new_cases,\
                        covid_stats.percentage_of_Pop_New_Cases,\
                        covid_stats.new_deaths,\
                        covid_stats.percentage_of_Pop_New_Deaths,\
                        covid_stats.new_tests,\
                        covid_stats.percentage_of_Pop_New_Tests,\
                        covid_stats.total_cases_updated,\
                        covid_stats.percentage_of_Pop_Total_Cases,\
                        covid_stats.total_deaths_updated,\
                        covid_stats.percentage_of_Pop_Total_Deaths,\
                        covid_stats.total_tests_updated,\
                        covid_stats.percentage_of_Pop_Total_Tests,\
                        government_regulation.primary_key_response,\
                        government_regulation.Date_updated,\
                        government_regulation.CountryName,\
                        government_regulation.CountryCode,\
                        government_regulation.C1_School_closing,\
                        government_regulation.C1_Flag,\
                        government_regulation.C2_Workplace_closing,\
                        government_regulation.C2_Flag,\
                        government_regulation.C3_Cancel_public_events,\
                        government_regulation.C3_Flag,\
                        government_regulation.C4_Restrictions_on_gatherings,\
                        government_regulation.C4_Flag,\
                        government_regulation.C5_Close_public_transport,\
                        government_regulation.C5_Flag,\
                        government_regulation.C6_Stay_at_home_requirements,\
                        government_regulation.C6_Flag,\
                        government_regulation.C7_Restrictions_on_internal_movement,\
                        government_regulation.C7_Flag,\
                        government_regulation.C8_International_travel_controls,\
                        government_regulation.E1_Income_support,\
                        government_regulation.E1_Flag,\
                        government_regulation.H1_Public_information_campaigns,\
                        government_regulation.H1_Flag,\
                        government_regulation.StringencyIndex_updated,\
                        government_regulation.percentage_change_stringency,\
                        government_regulation.StringencyLegacyIndex_updated,\
                        government_regulation.GovernmentResponseIndex_updated,\
                        government_regulation.percentage_change_GovernmentResponse,\
                        government_regulation.ContainmentHealthIndex_updated,\
                        government_regulation.percentage_change_ContainmentHealth,\
                        government_regulation.EconomicSupportIndex_updated,\
                        government_regulation.percentage_change_EconomicSupport)\
        .join(government_regulation,government_regulation.primary_key_response == covid_stats.primary_key_reaction)

# save the query results as a Pandas DataFrame and set the index
cleaned_df = pd.DataFrame(joined, columns=[ 'primary_key_reaction',
                                    'date_updated',
                                    'iso_code',
                                    'continent',
                                    'location',
                                    'population',
                                    'population_density',
                                    'median_age',
                                    'new_cases',
                                    'percentage_of_Pop_New_Cases',
                                    'new_deaths',
                                    'percentage_of_Pop_New_Deaths',
                                    'new_tests',
                                    'percentage_of_Pop_New_Tests',
                                    'total_cases_updated',
                                    'percentage_of_Pop_Total_Cases',
                                    'total_deaths_updated',
                                    'percentage_of_Pop_Total_Deaths',
                                    'total_tests_updated',
                                    'percentage_of_Pop_Total_Tests',
                                    'primary_key_response',
                                    'Date_updated',
                                    'CountryName',
                                    'CountryCode',
                                    'C1_School_closing',
                                    'C1_Flag',
                                    'C2_Workplace_closing',
                                    'C2_Flag',
                                    'C3_Cancel_public_events',
                                    'C3_Flag',
                                    'C4_Restrictions_on_gatherings',
                                    'C4_Flag',
                                    'C5_Close_public_transport',
                                    'C5_Flag',
                                    'C6_Stay_at_home_requirements',
                                    'C6_Flag',
                                    'C7_Restrictions_on_internal_movement',
                                    'C7_Flag',
                                    'C8_International_travel_controls',
                                    'E1_Income_support',
                                    'E1_Flag',
                                    'H1_Public_information_campaigns',
                                    'H1_Flag',
                                    'StringencyIndex_updated',
                                    'percentage_change_stringency',
                                    'StringencyLegacyIndex_updated',
                                    'GovernmentResponseIndex_updated',
                                    'percentage_change_GovernmentResponse',
                                    'ContainmentHealthIndex_updated',
                                    'percentage_change_ContainmentHealth',
                                    'EconomicSupportIndex_updated',
                                    'percentage_change_EconomicSupport'
                                ])

In [33]:
# Get the country codes 

iso_codes = cleaned_df["iso_code"].unique()

In [34]:
# This function converts a date string of the format YYYYMMDD to YYYY-MM-DD

def formatDate(dt):
    d_str = str(dt)
    year = d_str[0:4]
    month = d_str[4:6]
    day = d_str[6:8]
    dt_str = year + "-" + month + "-" + day
     
    return dt_str

In [35]:
# Create four dataframes, one for each feature set to be explored for predicting future total cases and total deaths

df = cleaned_df[['date_updated','iso_code','population','population_density','median_age','C1_School_closing','C2_Workplace_closing','C3_Cancel_public_events','C4_Restrictions_on_gatherings','C5_Close_public_transport','C6_Stay_at_home_requirements','C7_Restrictions_on_internal_movement','C8_International_travel_controls','total_cases_updated','total_deaths_updated']]


In [36]:
# The following function creates a dataframe for each individual iso_code (country code), which contains the 
# following information:
# - date, iso_code, future date

# It returns a dictionary of the country codes dataframes and a list of the iso_codes used as dictionary key values

# Input: 
# df - A list of dataframes

# Returns:
# - A countries dictionary, each element of which contains future date dictionaries, each of which has 
# three columns: data, iso_code, and future date
# - A iso_codes list contain a unique list of all the country codes

def get_iso_dicts(df):
    
    # Get a lost of the iso_codes
    #  iso_codes = df["iso_code"].unique()
    # iso_codes = ["CAN","GBR","USA"]
    
    # Initialize the country codes dictionary
    iso_dicts = {}
    
    # Iterate through the iso_cides
    for code in iso_codes:
        
        # Create a dictionary for the current iso_code
        iso_dicts[code] = {}
        
        # For each value of day in the future list (30,45,60, and 75)
        for day in future:
            
            # Create the future date column name
            date_col = "_".join(["date_updated",day])
            
            # Copy the date and iso_code columns to a new dataframe, df_iso
            df_iso = cleaned_df[cleaned_df["iso_code"]==code][["date_updated","iso_code"]]
            
            # Set the future date column to the current date + the current value of day
            df_iso[date_col] = pd.to_datetime(df_iso["date_updated"]) + timedelta(days=int(day))
            
            # Set the type of the future date column to str so that it can be used in merge operations
            df_iso[date_col] = df_iso[date_col].astype(str)

            # Add the dataframe to the iso_dicts country dictionaries 
            iso_dicts[code][day] = df_iso
                    
            if code == "CAN":
                can_df = df_iso.copy()
                
    # Return the country dataframes and the country codes        
    return iso_dicts, iso_codes

In [37]:
# This function groups all the country-day dataframe by the number of days into the future (30,45,60,75) into
# a list of four dataframes
#
# Input:
# iso_codes - country codes list
# iso_dicts - country-future-dates dictionaries
#
# Returns:
# A list of dataframes, one for each of the future days being considered
def get_cum_days(*iso_codes, **iso_dicts):
    
    # Initialize the days dataframe list
    days = []

    # For each day value in futures ('30','45','60','75')
    for day in future:
        
        # Create a new cumulative dataframe
        cum_df = pd.DataFrame()
        
        # For each iso code (country code)
        for code in iso_codes:
            
            # Append the dataframe corresponding to the country and the future days to the current cumulative dataframe
            cum_df = cum_df.append(iso_dicts[code][day])

        # Add the current cumulative dataframe to the days dataframe list
        days.append(cum_df.reset_index(drop=True))
    
    return days

In [38]:
# This function creates the future total cases and future total deaths columns by left joining the days dataframes
# with the original dataframe on the future date, the current date, and the iso_code columns.

# Input:
# days - list of future date dataframe

# Return:
# m_days - list of dataframes containing future total deaths and future total cases

def get_amended_days(*days):
        
    # Initialize the amended days dataframe list
    m_days = []
    
    # Iterate through the values in the future list ['30','45','60','75']
    for i,day in enumerate(future):
        
        # Merge the current days dataframe with the original dataframe and append it to m_days
        m_days.append(days[i].merge(cleaned_df,left_on=["date_updated_" + day,"iso_code"],right_on=["date_updated","iso_code"],how="left",suffixes=["","_" + day]))
        
        # Drop the index of the merged dataframe
        m_days[i] = m_days[i].reset_index(drop=True)
        
        # Choose the useful columns from the merged dataframe
        m_days[i] = m_days[i][["date_updated","iso_code","date_updated_" + day,"total_cases_updated","total_deaths_updated"]]
        
        # Rename the total_cases and total_deaths case to identify to which future period they belong
        m_days[i].rename(columns={"total_cases_updated":"total_cases_" + day,
                           "total_deaths_updated":"total_deaths_" + day},inplace=True)
        
    return m_days

In [39]:
# This function creates the list of day dataframes, one for each of the future periods under consideration. 
# The dataframes contain the following columns: date, iso_code, future_date, future total cases, future total deaths 

def get_days(df):
    
    # Get the country-days dicts and the country iso codes
    iso_dicts, iso_codes = get_iso_dicts(df)
    
    # Get the list of days dataframes for each of the future period lengths
    days = get_cum_days(*iso_codes, **iso_dicts)
        
    # Append to the list the future total deaths and the future total cases
    days = get_amended_days(*days)

    return days

In [40]:
# Earlier EDA has shown that, out of the researched neural network configurations, the following neural 
# network configuration produces the most accurate results

layers = { 
            "number_input_features": 0,   # number of input features will vary, value set elsewhere
            "n_layers": 4, 
            "l0":{"number_hidden_nodes":18,"activation_function":"relu"},
            "l1":{"number_hidden_nodes":9,"activation_function":"relu"},
            "l2":{"number_hidden_nodes":4,"activation_function":"relu"},
            "l3":{"number_hidden_nodes":1,"activation_function":"linear"}
        }

In [41]:
# Regression Analysis

can_df = None

# Initialize the future days list 
future = ["30"]

# Initialize the list of models
models = []

# For each of the feature lists under consideration
# Get the list of future days dataframes
days = get_days(df)

# For each of the targets, namely total_cases and total_deaths
for target in ["total_cases","total_deaths"]:

    print(f"{target} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>\n\n")

    # For each day in future ['30','45','60','75']
    for i,day in enumerate(future):

        print(f"\n\n>>>>>>>> {day} >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> ")

        # Get the features
        features = df.columns

        # Merge the feature dataframe with the day dataframe
        df_n = df.merge(days[i],on=["date_updated","iso_code"],how="left")
        # df_n.to_csv(f"Resources/Model-3-Days-{future[i]}.csv")

        # Remove the target columns from merged dataframe
        print(f"features3 = {features}")
        features = [f for f in features if re.search("_\d{2}$",f) == None]
        print(f"features4 = {features}")
        #print(f"features3: {features}")
        features = features[2:]  # Remove date and iso_code
        #print(f"features4: {features}")
        # Set the number of input features for the neural networks based on the current feature set
        layers["number_input_features"] = len(features)

        # Create an instance of the ml class to start machine learning
        md = ml(df=df_n,
                feature_set=features,
                test_size=0.05,
                title=f"{target} - {day} days out",
                target=target,
                period=day,
                **layers
               )

        # Train and test the machine learning mode over 100 epochs
        print("train_test")
        md.train_test(epochs=100)

        # Append the instance of the ml class into the models list
        models.append(md)


total_cases %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
       'median_age', 'C1_School_closing', 'C2_Workplace_closing',
       'C3_Cancel_public_events', 'C4_Restrictions_on_gatherings',
       'C5_Close_public_transport', 'C6_Stay_at_home_requirements',
       'C7_Restrictions_on_internal_movement',
       'C8_International_travel_controls', 'total_cases_updated',
       'total_deaths_updated'],
      dtype='object')
features4 = ['date_updated', 'iso_code', 'population', 'population_density', 'median_age', 'C1_School_closing', 'C2_Workplace_closing', 'C3_Cancel_public_events', 'C4_Restrictions_on_gatherings', 'C5_Close_public_transport', 'C6_Stay_at_home_requirements', 'C7_Restrictions_on_internal_movement', 'C8_International_travel_controls', 'total_cases_updated', 'total_deaths_updated']
train_test


 Training a

Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
Training r2_score = 0.9983505140660962
Testing  r2_score = 0.9981686071588125
total_deaths %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%>




>>>>>>>> 30 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 
features3 = Index(['date_updated', 'iso_code', 'population', 'population_density',
       'median_age', 'C1_School_closing', 'C2_Workplace_closing',
       'C3_Cancel_public_events', 'C4_Restrictions_on_gatherings',
       'C5_Cl

Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100

Epoch 99/100
Epoch 100/100
Training r2_score = 0.9929204498178265
Testing  r2_score = 0.9902985531091705


In [43]:
day_30 = get_days(df)[0]
d_temp = day_30[["date_updated","iso_code","total_cases_30","total_deaths_30"]]
du_30 = pd.DataFrame({"date_updated":day_30["date_updated"],"date_updated_30":day_30["date_updated_30"].iloc[:,1]})
day_30 = d_temp.merge(du_30,on=["date_updated"])
day_30

Unnamed: 0,date_updated,iso_code,total_cases_30,total_deaths_30,date_updated_30
0,2020-03-13,ABW,92.0,0.0,2020-04-12
1,2020-03-13,ABW,92.0,0.0,2020-04-12
2,2020-03-13,ABW,92.0,0.0,2020-04-12
3,2020-03-13,ABW,92.0,0.0,2020-04-12
4,2020-03-13,ABW,92.0,0.0,2020-04-12
...,...,...,...,...,...
4738918,2020-03-18,ZAF,2605.0,48.0,2020-04-17
4738919,2020-03-18,ZAF,2605.0,48.0,2020-04-17
4738920,2020-03-18,ZAF,2605.0,48.0,2020-04-17
4738921,2020-03-18,ZAF,2605.0,48.0,2020-04-17


In [44]:
# Dataframe to hold Iso_code, CountryName, Actual Deaths, Predicted Deaths, Actual Cases, Predicted Cases

acc_cases = pd.DataFrame()
acc_deaths = pd.DataFrame()

In [46]:
# Iterate through the country dataframes and retrieve the row for which date_updated_30 is 2020-08-31. The row 
# also contains total_(cases or deaths)_30, which is the actual number of cases (or deaths), and 
# total_(cases or deaths)_30_pred, the predicted number of cases (or deaths). 

for i,md in enumerate(models):
    
    # get the cleaned dataframe
    df = cleaned_df
    
    # get the model from md, the ml instance
    model = md.get_model()
    
    # get the feature set from the model
    features = md.get_feature_set().copy()
    
    # Get the StandardScaler instances for the features, X_scaler, and the target, y_scaler, from the model
    X_scaler = md.get_X_scaler()
    y_scaler = md.get_y_scaler()
    
    # Get the target name by concatenating the target (cases or deaths) with the model period (30 days)
    target = md.get_target() + "_" + md.get_period()
    
    # For each country code
    for code in iso_codes:
        
        # Get the feature dataframe
        X = df[df["iso_code"] == code][md.get_feature_set()]
        
        # Scale the feature dataframe
        X_scaled = X_scaler.transform(X)
        
        # Get the predicted target values (scaled)
        y_pred_scaled = model.predict(X_scaled)
        
        # Get the predicted target values (unscaled)
        y_pred = y_scaler.inverse_transform(y_pred_scaled)
        
        # Get the dataframe for the current country code
        df_ic = df[df["iso_code"] == code].copy()
        
        # Create the time list by converting the date_updated column to string
        time = [str(t) for t in df_ic["date_updated"]]
        
        # Get the date values list
        x_vals = df_ic["date_updated"].values.tolist()
        
        # Get the target values list (ignore the errors)
        try:
            y_test = df_ic[target].values.tolist()
        except:
            # Just keep going
            pass

        # Create the feature string to be used in the filename when saving the data to disk    
        feature_str = ""
        for feature in features:
            if re.search('^[CEH\d]',feature) == None:
                feature_str = feature_str + "_" + feature
            else:
                feature_str = feature_str + "_" + feature[:2]
        
        # Add the predicted-values column to the dataframe
        df_ic[target + "_pred"] = y_pred
        
        # Merge the dataframe with day_30, the dataframe which contains the future case and death values
        df_ic = df_ic.merge(day_30,on=["date_updated","iso_code"],how="left")
        
        # Grab the row in which date_updated_30 is 2020-08-31
        print(" ============================================ >>>>>>>>>>>>>>>>>>>>>>")
        to_append = df_ic[df_ic["date_updated_30"] == "2020-08-31"][["iso_code","location","date_updated_30",target,target+"_pred"]]
        to_append = to_append.iloc[0]
        
        # Show the values to append and the target column being generated
        print(to_append)
        print(f"target = {target}")
        
        # Depending on the target being generated, add the new row to acc_deaths or acc_cases
        if "deaths" in target:
            acc_deaths = acc_deaths.append(to_append)
        else:
            acc_cases = acc_cases.append(to_append)
         
        # Output the predictions to a csv file in the predictions folder
        df_ic.to_csv(f"Resources/predictions/target-{target}-country-{code}.csv")
        
        # Plot the graphs comparing predicted values to actual values
        plt.figure(figsize=[25,15])
        plt.plot(df_ic["date_updated"], df_ic[target], c="Red")
        plt.plot(df_ic["date_updated"], df_ic[target + "_pred"], c="Blue")
        
        plt.xlabel("Time")
        plt.ylabel("Actual/Predicted")
        
        plt.title(md.get_title() + "; Country: " + code)
        plt.xticks(rotation=90)
        plt.legend(["Actual","Predicted"])        
        
        plt.savefig(f"Resources/graphs/target-{md.get_target()}-country-{code}.png")
        plt.close()


iso_code                      ABW
location                    Aruba
date_updated_30        2020-08-31
total_cases_30               1997
total_cases_30_pred       6577.97
Name: 21869, dtype: object
target = total_cases_30
iso_code                       AFG
location               Afghanistan
date_updated_30         2020-08-31
total_cases_30               38162
total_cases_30_pred        37563.9
Name: 27214, dtype: object
target = total_cases_30
iso_code                      AGO
location                   Angola
date_updated_30        2020-08-31
total_cases_30               2624
total_cases_30_pred       5659.27
Name: 21346, dtype: object
target = total_cases_30
iso_code                      ALB
location                  Albania
date_updated_30        2020-08-31
total_cases_30               9380
total_cases_30_pred       3928.81
Name: 22837, dtype: object
target = total_cases_30
iso_code                                ARE
location               United Arab Emirates
date_updated_30        

iso_code                      CHL
location                    Chile
date_updated_30        2020-08-31
total_cases_30             409974
total_cases_30_pred        495396
Name: 23153, dtype: object
target = total_cases_30
iso_code                      CHN
location                    China
date_updated_30        2020-08-31
total_cases_30              89895
total_cases_30_pred       98779.2
Name: 27214, dtype: object
target = total_cases_30
iso_code                         CIV
location               Cote d'Ivoire
date_updated_30           2020-08-31
total_cases_30                 17948
total_cases_30_pred          22481.9
Name: 22361, dtype: object
target = total_cases_30
iso_code                      CMR
location                 Cameroon
date_updated_30        2020-08-31
total_cases_30              19142
total_cases_30_pred         20137
Name: 22639, dtype: object
target = total_cases_30
iso_code                                        COD
location               Democratic Republic of Con

iso_code                      GEO
location                  Georgia
date_updated_30        2020-08-31
total_cases_30               1487
total_cases_30_pred       5047.37
Name: 27214, dtype: object
target = total_cases_30
iso_code                      GHA
location                    Ghana
date_updated_30        2020-08-31
total_cases_30              44205
total_cases_30_pred       52830.8
Name: 22363, dtype: object
target = total_cases_30
iso_code                      GIN
location                   Guinea
date_updated_30        2020-08-31
total_cases_30               9371
total_cases_30_pred       14505.3
Name: 22244, dtype: object
target = total_cases_30
iso_code                      GMB
location                   Gambia
date_updated_30        2020-08-31
total_cases_30               2963
total_cases_30_pred      -9476.56
Name: 21895, dtype: object
target = total_cases_30
iso_code                      GRC
location                   Greece
date_updated_30        2020-08-31
total_cases_30

iso_code                      KWT
location                   Kuwait
date_updated_30        2020-08-31
total_cases_30              84636
total_cases_30_pred       75602.3
Name: 27214, dtype: object
target = total_cases_30
iso_code                      LAO
location                     Laos
date_updated_30        2020-08-31
total_cases_30                 22
total_cases_30_pred      -2318.26
Name: 20894, dtype: object
target = total_cases_30
iso_code                      LBN
location                  Lebanon
date_updated_30        2020-08-31
total_cases_30              16870
total_cases_30_pred       11533.3
Name: 27214, dtype: object
target = total_cases_30
iso_code                      LBR
location                  Liberia
date_updated_30        2020-08-31
total_cases_30               1304
total_cases_30_pred       6239.57
Name: 22022, dtype: object
target = total_cases_30
iso_code                      LBY
location                    Libya
date_updated_30        2020-08-31
total_cases_30

iso_code                       NZL
location               New Zealand
date_updated_30         2020-08-31
total_cases_30                1387
total_cases_30_pred         489.88
Name: 27214, dtype: object
target = total_cases_30
iso_code                      OMN
location                     Oman
date_updated_30        2020-08-31
total_cases_30              85544
total_cases_30_pred       86129.9
Name: 27214, dtype: object
target = total_cases_30
iso_code                      PAK
location                 Pakistan
date_updated_30        2020-08-31
total_cases_30             295849
total_cases_30_pred        331917
Name: 27214, dtype: object
target = total_cases_30
iso_code                      PAN
location                   Panama
date_updated_30        2020-08-31
total_cases_30              92065
total_cases_30_pred       77783.6
Name: 22837, dtype: object
target = total_cases_30
iso_code                      PER
location                     Peru
date_updated_30        2020-08-31
total_cas

iso_code                      SWZ
location                Swaziland
date_updated_30        2020-08-31
total_cases_30               4561
total_cases_30_pred       5711.25
Name: 22260, dtype: object
target = total_cases_30
iso_code                      SYC
location               Seychelles
date_updated_30        2020-08-31
total_cases_30                136
total_cases_30_pred      -2451.23
Name: 22260, dtype: object
target = total_cases_30
iso_code                      TCD
location                     Chad
date_updated_30        2020-08-31
total_cases_30               1012
total_cases_30_pred       6005.44
Name: 21630, dtype: object
target = total_cases_30
iso_code                      TGO
location                     Togo
date_updated_30        2020-08-31
total_cases_30               1396
total_cases_30_pred       3834.82
Name: 23237, dtype: object
target = total_cases_30
iso_code                      THA
location                 Thailand
date_updated_30        2020-08-31
total_cases_30

iso_code                       AUS
location                 Australia
date_updated_30         2020-08-31
total_deaths_30                611
total_deaths_30_pred       922.723
Name: 27214, dtype: object
target = total_deaths_30
iso_code                       AUT
location                   Austria
date_updated_30         2020-08-31
total_deaths_30                733
total_deaths_30_pred        1007.4
Name: 27214, dtype: object
target = total_deaths_30
iso_code                       AZE
location                Azerbaijan
date_updated_30         2020-08-31
total_deaths_30                531
total_deaths_30_pred       815.034
Name: 27214, dtype: object
target = total_deaths_30
iso_code                       BDI
location                   Burundi
date_updated_30         2020-08-31
total_deaths_30                  1
total_deaths_30_pred       136.964
Name: 19805, dtype: object
target = total_deaths_30
iso_code                       BEL
location                   Belgium
date_updated_30       

iso_code                       COG
location                     Congo
date_updated_30         2020-08-31
total_deaths_30                 78
total_deaths_30_pred       796.058
Name: 22143, dtype: object
target = total_deaths_30
iso_code                       COL
location                  Colombia
date_updated_30         2020-08-31
total_deaths_30              19364
total_deaths_30_pred       19655.6
Name: 23161, dtype: object
target = total_deaths_30
iso_code                       CPV
location                Cape Verde
date_updated_30         2020-08-31
total_deaths_30                 40
total_deaths_30_pred        221.83
Name: 21491, dtype: object
target = total_deaths_30
iso_code                       CRI
location                Costa Rica
date_updated_30         2020-08-31
total_deaths_30                418
total_deaths_30_pred       196.806
Name: 22920, dtype: object
target = total_deaths_30
iso_code                       CUB
location                      Cuba
date_updated_30       

iso_code                       GTM
location                 Guatemala
date_updated_30         2020-08-31
total_deaths_30               2740
total_deaths_30_pred       3557.55
Name: 22260, dtype: object
target = total_deaths_30
iso_code                       GUM
location                      Guam
date_updated_30         2020-08-31
total_deaths_30                 10
total_deaths_30_pred      -349.573
Name: 21766, dtype: object
target = total_deaths_30
iso_code                       GUY
location                    Guyana
date_updated_30         2020-08-31
total_deaths_30                 37
total_deaths_30_pred      -446.069
Name: 22246, dtype: object
target = total_deaths_30
iso_code                       HKG
location                 Hong Kong
date_updated_30         2020-08-31
total_deaths_30                NaN
total_deaths_30_pred      -88.0804
Name: 943, dtype: object
target = total_deaths_30
iso_code                       HND
location                  Honduras
date_updated_30         

iso_code                       LBY
location                     Libya
date_updated_30         2020-08-31
total_deaths_30                232
total_deaths_30_pred       398.682
Name: 20894, dtype: object
target = total_deaths_30
iso_code                       LKA
location                 Sri Lanka
date_updated_30         2020-08-31
total_deaths_30                 12
total_deaths_30_pred       76.0244
Name: 27214, dtype: object
target = total_deaths_30
iso_code                       LSO
location                   Lesotho
date_updated_30         2020-08-31
total_deaths_30                 31
total_deaths_30_pred       283.388
Name: 12717, dtype: object
target = total_deaths_30
iso_code                       LTU
location                 Lithuania
date_updated_30         2020-08-31
total_deaths_30                 86
total_deaths_30_pred       275.923
Name: 27214, dtype: object
target = total_deaths_30
iso_code                       LUX
location                Luxembourg
date_updated_30       

iso_code                       PAN
location                    Panama
date_updated_30         2020-08-31
total_deaths_30               1995
total_deaths_30_pred       2127.62
Name: 22837, dtype: object
target = total_deaths_30
iso_code                       PER
location                      Peru
date_updated_30         2020-08-31
total_deaths_30              28788
total_deaths_30_pred       25939.5
Name: 23585, dtype: object
target = total_deaths_30
iso_code                        PHL
location                Philippines
date_updated_30          2020-08-31
total_deaths_30                3520
total_deaths_30_pred         3524.5
Name: 27214, dtype: object
target = total_deaths_30
iso_code                             PNG
location                Papua New Guinea
date_updated_30               2020-08-31
total_deaths_30                        5
total_deaths_30_pred            -185.645
Name: 21491, dtype: object
target = total_deaths_30
iso_code                       POL
location              

iso_code                       TGO
location                      Togo
date_updated_30         2020-08-31
total_deaths_30                 27
total_deaths_30_pred       1.83539
Name: 23237, dtype: object
target = total_deaths_30
iso_code                       THA
location                  Thailand
date_updated_30         2020-08-31
total_deaths_30                 58
total_deaths_30_pred      -3.00128
Name: 27214, dtype: object
target = total_deaths_30
iso_code                       TJK
location                Tajikistan
date_updated_30         2020-08-31
total_deaths_30                 68
total_deaths_30_pred       -292.94
Name: 14985, dtype: object
target = total_deaths_30
iso_code                       TLS
location                     Timor
date_updated_30         2020-08-31
total_deaths_30                  0
total_deaths_30_pred       335.304
Name: 21346, dtype: object
target = total_deaths_30
iso_code                                TTO
location                Trinidad and Tobago
date

In [48]:
# Save the acc_cases and acc_deaths dataframes to csv
acc_cases.to_csv("Cases_ActualvsPredicted.csv")
acc_deaths.to_csv("Deaths_ActualvsPredicted.csv")

In [49]:
acc_cases.shape

(165, 5)

In [50]:
acc_deaths.shape

(164, 5)