# Example Predictor: Linear Rollout Predictor

This example contains basic functionality for training and evaluating a linear predictor that rolls out predictions day-by-day.

First, a training data set is created from historical case and npi data.

Second, a linear model is trained to predict future cases from prior case data along with prior and future npi data.
The model is an off-the-shelf sklearn Lasso model, that uses a positive weight constraint to enforce the assumption that increased npis has a negative correlation with future cases.

Third, a sample evaluation set is created, and the predictor is applied to this evaluation set to produce prediction results in the correct format.

## Training

In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

### Copy the data locally

In [2]:
# Main source for the training data
DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
# Local file
DATA_FILE = 'data/OxCGRT_latest.csv'

In [3]:
import os
import urllib.request
if not os.path.exists('data'):
    os.mkdir('data')
urllib.request.urlretrieve(DATA_URL, DATA_FILE)

('data/OxCGRT_latest.csv', <http.client.HTTPMessage at 0x7f1f0ceb1b70>)

In [4]:
# Load historical data from local file
df = pd.read_csv(DATA_FILE, 
                 parse_dates=['Date'],
                 encoding="ISO-8859-1",
                 dtype={"RegionName": str,
                        "RegionCode": str},
                 error_bad_lines=False)

In [5]:
df.columns

Index(['CountryName', 'CountryCode', 'RegionName', 'RegionCode',
       'Jurisdiction', 'Date', 'C1_School closing', 'C1_Flag',
       'C2_Workplace closing', 'C2_Flag', 'C3_Cancel public events', 'C3_Flag',
       'C4_Restrictions on gatherings', 'C4_Flag', 'C5_Close public transport',
       'C5_Flag', 'C6_Stay at home requirements', 'C6_Flag',
       'C7_Restrictions on internal movement', 'C7_Flag',
       'C8_International travel controls', 'E1_Income support', 'E1_Flag',
       'E2_Debt/contract relief', 'E3_Fiscal measures',
       'E4_International support', 'H1_Public information campaigns',
       'H1_Flag', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H6_Facial Coverings', 'H6_Flag', 'M1_Wildcard', 'ConfirmedCases',
       'ConfirmedDeaths', 'StringencyIndex', 'StringencyIndexForDisplay',
       'StringencyLegacyIndex', 'StringencyLegacyIndexForDisplay',
       'GovernmentResponseIndex', 'Gove

In [6]:
# For testing, restrict training data to that before a hypothetical predictor submission date
HYPOTHETICAL_SUBMISSION_DATE = np.datetime64("2020-07-31")
df = df[df.Date <= HYPOTHETICAL_SUBMISSION_DATE]

In [7]:
# Add RegionID column that combines CountryName and RegionName for easier manipulation of data
df['GeoID'] = df['CountryName'] + '__' + df['RegionName'].astype(str)

In [8]:
# Add new cases column
df['NewCases'] = df.groupby('GeoID').ConfirmedCases.diff().fillna(0)

In [9]:
# Keep only columns of interest
id_cols = ['CountryName',
           'RegionName',
           'GeoID',
           'Date']
cases_col = ['NewCases']
npi_cols = ['C1_School closing',
            'C2_Workplace closing',
            'C3_Cancel public events',
            'C4_Restrictions on gatherings',
            'C5_Close public transport',
            'C6_Stay at home requirements',
            'C7_Restrictions on internal movement',
            'C8_International travel controls',
            'H1_Public information campaigns',
            'H2_Testing policy',
            'H3_Contact tracing',
            'H6_Facial Coverings']
df = df[id_cols + cases_col + npi_cols]

In [10]:
# Fill any missing case values by interpolation and setting NaNs to 0
df.update(df.groupby('GeoID').NewCases.apply(
    lambda group: group.interpolate()).fillna(0))

In [11]:
# Fill any missing NPIs by assuming they are the same as previous day
for npi_col in npi_cols:
    df.update(df.groupby('GeoID')[npi_col].ffill().fillna(0))

In [12]:
df

Unnamed: 0,CountryName,RegionName,GeoID,Date,NewCases,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,Aruba,,Aruba__nan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Aruba,,Aruba__nan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Aruba,,Aruba__nan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aruba,,Aruba__nan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Aruba,,Aruba__nan,2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87064,Zimbabwe,,Zimbabwe__nan,2020-07-27,78.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87065,Zimbabwe,,Zimbabwe__nan,2020-07-28,192.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87066,Zimbabwe,,Zimbabwe__nan,2020-07-29,113.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87067,Zimbabwe,,Zimbabwe__nan,2020-07-30,62.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0


In [13]:
df['CountryName'].unique()

array(['Aruba', 'Afghanistan', 'Angola', 'Albania', 'Andorra',
       'United Arab Emirates', 'Argentina', 'Australia', 'Austria',
       'Azerbaijan', 'Burundi', 'Belgium', 'Benin', 'Burkina Faso',
       'Bangladesh', 'Bulgaria', 'Bahrain', 'Bahamas',
       'Bosnia and Herzegovina', 'Belarus', 'Belize', 'Bermuda',
       'Bolivia', 'Brazil', 'Barbados', 'Brunei', 'Bhutan', 'Botswana',
       'Central African Republic', 'Canada', 'Switzerland', 'Chile',
       'China', "Cote d'Ivoire", 'Cameroon',
       'Democratic Republic of Congo', 'Congo', 'Colombia', 'Comoros',
       'Cape Verde', 'Costa Rica', 'Cuba', 'Cyprus', 'Czech Republic',
       'Germany', 'Djibouti', 'Dominica', 'Denmark', 'Dominican Republic',
       'Algeria', 'Ecuador', 'Egypt', 'Eritrea', 'Spain', 'Estonia',
       'Ethiopia', 'Finland', 'Fiji', 'France', 'Faeroe Islands', 'Gabon',
       'United Kingdom', 'Georgia', 'Ghana', 'Guinea', 'Gambia', 'Greece',
       'Greenland', 'Guatemala', 'Guam', 'Guyana', 'Hong Ko

In [14]:
# Set number of past days to use to make predictions
nb_lookback_days = 30

# Create training data across all countries for predicting one day ahead
X_cols = cases_col + npi_cols
y_col = cases_col
X_samples = []
y_samples = []
geo_ids = df.GeoID.unique()
for g in geo_ids:
    gdf = df[df.GeoID == g]
    all_case_data = np.array(gdf[cases_col])
    all_npi_data = np.array(gdf[npi_cols])

    # Create one sample for each day where we have enough data
    # Each sample consists of cases and npis for previous nb_lookback_days
    nb_total_days = len(gdf)
    for d in range(nb_lookback_days, nb_total_days - 1):
        X_cases = all_case_data[d-nb_lookback_days:d]

        # Take negative of npis to support positive
        # weight constraint in Lasso.
        X_npis = -all_npi_data[d - nb_lookback_days:d]

        # Flatten all input data so it fits Lasso input format.
        X_sample = np.concatenate([X_cases.flatten(),
                                   X_npis.flatten()])
        y_sample = all_case_data[d + 1]
        X_samples.append(X_sample)
        y_samples.append(y_sample)

X_samples = np.array(X_samples)
y_samples = np.array(y_samples).flatten()

In [15]:
# Helpful function to compute mae
def mae(pred, true):
    return np.mean(np.abs(pred - true))

In [16]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_samples,
                                                    y_samples,
                                                    test_size=0.2,
                                                    random_state=301)

In [17]:
# # Create and train Lasso model.
# # Set positive=True to enforce assumption that cases are positively correlated
# # with future cases and npis are negatively correlated.
# model = Lasso(alpha=0.1,
#               precompute=True,
#               max_iter=10000,
#               positive=True,
#               selection='random')
# # Fit model
# model.fit(X_train, y_train)

In [18]:
# from sklearn.linear_model import Ridge
# model = Ridge(alpha = 0.5)
# # Fit model
# model.fit(X_train, y_train)

In [19]:
# from sklearn.experimental import enable_hist_gradient_boosting
# from sklearn.ensemble import HistGradientBoostingRegressor
# model = HistGradientBoostingRegressor(loss='least_squares',  learning_rate=0.1, max_iter=1000, max_leaf_nodes=31, max_depth=5, min_samples_leaf=20, l2_regularization=0.5, max_bins=255, monotonic_cst=None, warm_start=False, early_stopping='auto', scoring='loss', validation_fraction=0.1, n_iter_no_change=10, tol=1e-07, verbose=0, random_state=None)
# model.fit(X_train, y_train)

In [20]:
# from sklearn.ensemble import GradientBoostingRegressor
# model = GradientBoostingRegressor()
# model.fit(X_train, y_train)

In [21]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
model = AdaBoostRegressor(base_estimator = DecisionTreeRegressor(max_depth=10), n_estimators = 500, loss = 'square')
model.fit(X_train, y_train)

AdaBoostRegressor(base_estimator=DecisionTreeRegressor(max_depth=10),
                  loss='square', n_estimators=500)

In [22]:
# Evaluate model
train_preds = model.predict(X_train)
train_preds = np.maximum(train_preds, 0) # Don't predict negative cases
print('Train MAE:', mae(train_preds, y_train))

test_preds = model.predict(X_test)
test_preds = np.maximum(test_preds, 0) # Don't predict negative cases
print('Test MAE:', mae(test_preds, y_test))

Train MAE: 231.2096294135742
Test MAE: 283.4694259694012


In [23]:
# # Inspect the learned feature coefficients for the model
# # to see what features it's paying attention to.

# # Give names to the features
# x_col_names = []
# for d in range(-nb_lookback_days, 0):
#     x_col_names.append('Day ' + str(d) + ' ' + cases_col[0])
# for d in range(-nb_lookback_days, 1):
#     for col_name in npi_cols:
#         x_col_names.append('Day ' + str(d) + ' ' + col_name)

# # View non-zero coefficients
# for (col, coeff) in zip(x_col_names, list(model.coef_)):
#     if coeff != 0.:
#         print(col, coeff)
# print('Intercept', model.intercept_)

In [24]:
# Save model to file
if not os.path.exists('models'):
    os.mkdir('models')
with open('models/model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

## Evaluation

Now that the predictor has been trained and saved, this section contains the functionality for evaluating it on sample evaluation data.

In [25]:
# Reload the module to get the latest changes
import predict
from importlib import reload
reload(predict)
from predict import predict_df

In [26]:
list_countries = sorted(list(set(df.CountryName)))
hist_ips_df = pd.read_csv("data/2020-09-30_historical_ip.csv",
                              parse_dates=['Date'],
                              encoding="ISO-8859-1",
                              dtype={"RegionName": str},
                              error_bad_lines=True)
hist_ips_df = hist_ips_df[hist_ips_df.CountryName.isin(list_countries)]
hist_ips_df.to_csv("data/2020-09-30_historical_ip_new.csv" , index = False) 

In [27]:
%%time
preds_df = predict_df("2020-08-01", "2020-08-31", path_to_ips_file="data/2020-09-30_historical_ip_new.csv", verbose=True)


Predicting for Aruba__nan
2020-08-01: 265.9029850746269
2020-08-02: 293.048309178744
2020-08-03: 298.4912099276112
2020-08-04: 314.4155844155844
2020-08-05: 367.6505248899424
2020-08-06: 401.5450669914738
2020-08-07: 457.35806074766356
2020-08-08: 507.6719160104987
2020-08-09: 594.1411764705882
2020-08-10: 645.2005649717514
2020-08-11: 723.9504504504505
2020-08-12: 778.104
2020-08-13: 794.875
2020-08-14: 827.6923076923077
2020-08-15: 905.552
2020-08-16: 1047.929411764706
2020-08-17: 1143.2547444007523
2020-08-18: 1230.332110091743
2020-08-19: 1366.3333333333333
2020-08-20: 1165.3647058823528
2020-08-21: 1302.2609939220592
2020-08-22: 1319.2734067663257
2020-08-23: 1387.9222972972973
2020-08-24: 1402.2289669861555
2020-08-25: 1437.4609540636043
2020-08-26: 1425.7229437229437
2020-08-27: 1410.2761780104713
2020-08-28: 1416.5485786254048
2020-08-29: 1620.71758135017
2020-08-30: 1714.21875
2020-08-31: 1827.0

Predicting for Afghanistan__nan
2020-08-01: 403.5917602996255
2020-08-02: 437.03

2020-08-01: 2474.945054945055
2020-08-02: 2520.5945171849426
2020-08-03: 2235.1178144564597
2020-08-04: 2529.43358568479
2020-08-05: 2352.549465240642
2020-08-06: 2184.2649842271294
2020-08-07: 2123.1085582998276
2020-08-08: 2266.226804123711
2020-08-09: 2088.4631994427677
2020-08-10: 1978.2153008769278
2020-08-11: 1733.242673992674
2020-08-12: 1892.0616883116884
2020-08-13: 1849.471766848816
2020-08-14: 1678.5689655172414
2020-08-15: 1673.8011424491253
2020-08-16: 1678.791856348002
2020-08-17: 1561.3857142857144
2020-08-18: 1567.7771084337348
2020-08-19: 1561.3857142857144
2020-08-20: 1530.1283607979185
2020-08-21: 1486.7685950413222
2020-08-22: 1486.7685950413222
2020-08-23: 1523.221237268955
2020-08-24: 1478.9318181818182
2020-08-25: 1496.951048951049
2020-08-26: 1451.844589409056
2020-08-27: 1451.844589409056
2020-08-28: 1437.4609540636043
2020-08-29: 1437.0379746835442
2020-08-30: 1450.3586678052945
2020-08-31: 1435.0263157894738

Predicting for Burundi__nan
2020-08-01: 229.790130

2020-08-18: 1572.413208389112
2020-08-19: 1406.6343669250646
2020-08-20: 1273.9642857142858
2020-08-21: 1416.5485786254048
2020-08-22: 1486.7685950413222
2020-08-23: 1552.391304347826
2020-08-24: 1558.875
2020-08-25: 1590.6363636363637
2020-08-26: 1478.429411764706
2020-08-27: 1541.0182128628344
2020-08-28: 1435.0263157894738
2020-08-29: 1545.6255773672056
2020-08-30: 1570.011320754717
2020-08-31: 1558.875

Predicting for Belarus__nan
2020-08-01: 1437.4609540636043
2020-08-02: 1558.875
2020-08-03: 1570.011320754717
2020-08-04: 1611.5749324063345
2020-08-05: 1572.413208389112
2020-08-06: 1560.2747138397503
2020-08-07: 1561.3857142857144
2020-08-08: 1643.6465280502157
2020-08-09: 1612.8761904761905
2020-08-10: 1612.8761904761905
2020-08-11: 1572.413208389112
2020-08-12: 1612.8761904761905
2020-08-13: 1572.413208389112
2020-08-14: 1612.8761904761905
2020-08-15: 1570.011320754717
2020-08-16: 1572.413208389112
2020-08-17: 1574.2950495049504
2020-08-18: 1574.2950495049504
2020-08-19: 1590.63

2020-08-20: 1550.1631504922643
2020-08-21: 1724.7342792647532
2020-08-22: 1685.0296096904442
2020-08-23: 1617.0939759036144
2020-08-24: 1795.0804416403785
2020-08-25: 1620.71758135017
2020-08-26: 1617.0939759036144
2020-08-27: 1654.1842105263158
2020-08-28: 1599.9110225763611
2020-08-29: 1748.8464730290457
2020-08-30: 1766.8474576271187
2020-08-31: 1744.2295514511873

Predicting for Central African Republic__nan
2020-08-01: 243.8987341772152
2020-08-02: 243.8987341772152
2020-08-03: 213.1256038647343
2020-08-04: 213.58227848101265
2020-08-05: 260.62068965517244
2020-08-06: 333.82978723404256
2020-08-07: 399.0625
2020-08-08: 448.75
2020-08-09: 483.3636363636364
2020-08-10: 588.762979683973
2020-08-11: 748.5644796380091
2020-08-12: 804.7474747474747
2020-08-13: 887.1666666666666
2020-08-14: 871.9005235602094
2020-08-15: 1060.4
2020-08-16: 1148.043659043659
2020-08-17: 1293.25
2020-08-18: 1601.3225806451612
2020-08-19: 1626.875
2020-08-20: 1503.4333333333334
2020-08-21: 1468.016393442623


2020-08-15: 1448.8992137240887
2020-08-16: 1547.8177842565597
2020-08-17: 1781.7837837837837
2020-08-18: 1669.5294117647059
2020-08-19: 1805.6119402985075
2020-08-20: 1806.8677685950413
2020-08-21: 2019.8743994509266
2020-08-22: 1806.8677685950413
2020-08-23: 1939.278431372549
2020-08-24: 1877.8504847564536
2020-08-25: 1951.241134751773
2020-08-26: 1768.9919162470183
2020-08-27: 1878.5364238410596
2020-08-28: 1816.2610441767067
2020-08-29: 1854.7531914893616
2020-08-30: 1878.5364238410596
2020-08-31: 1865.581818181818

Predicting for Colombia__nan
2020-08-01: 7355.0
2020-08-02: 7433.987369985141
2020-08-03: 7419.8
2020-08-04: 7571.1
2020-08-05: 6922.034782608695
2020-08-06: 7169.133891213389
2020-08-07: 7145.104895104895
2020-08-08: 7424.5
2020-08-09: 7535.6
2020-08-10: 7334.231578947369
2020-08-11: 7597.181818181818
2020-08-12: 7540.15243902439
2020-08-13: 7617.2962962962965
2020-08-14: 7720.714285714285
2020-08-15: 7841.178571428572
2020-08-16: 7932.035714285715
2020-08-17: 7979.8509

2020-08-16: 1033.3720930232557
2020-08-17: 1085.2487562189056
2020-08-18: 1214.9326923076924
2020-08-19: 1366.3333333333333
2020-08-20: 1411.3333333333333
2020-08-21: 1316.5503875968993
2020-08-22: 1430.4125
2020-08-23: 1382.9
2020-08-24: 1472.626923076923
2020-08-25: 1483.5442092154422
2020-08-26: 1542.8234851336094
2020-08-27: 1435.0263157894738
2020-08-28: 1439.3921568627452
2020-08-29: 1598.1097393689986
2020-08-30: 1678.5689655172414
2020-08-31: 1820.778711484594

Predicting for Dominica__nan
2020-08-01: 172.7400468384075
2020-08-02: 200.12034383954153
2020-08-03: 197.12048192771084
2020-08-04: 203.0
2020-08-05: 216.09677419354838
2020-08-06: 295.22991689750694
2020-08-07: 309.9575518262586
2020-08-08: 363.01181525241674
2020-08-09: 467.35822147651004
2020-08-10: 491.84979480164156
2020-08-11: 758.0
2020-08-12: 932.1153846153846
2020-08-13: 1041.25
2020-08-14: 1213.9716599190283
2020-08-15: 1367.5338645418326
2020-08-16: 1501.4444444444443
2020-08-17: 1752.6923076923076
2020-08-18

2020-08-15: 659.6160714285714
2020-08-16: 679.1296821793417
2020-08-17: 671.1842696629213
2020-08-18: 676.9837631327603
2020-08-19: 684.7323232323232
2020-08-20: 706.728813559322
2020-08-21: 735.9987253027406
2020-08-22: 700.9027921406412
2020-08-23: 705.1904761904761
2020-08-24: 706.728813559322
2020-08-25: 735.9987253027406
2020-08-26: 801.5714285714286
2020-08-27: 735.9987253027406
2020-08-28: 823.1659959758551
2020-08-29: 822.9754601226994
2020-08-30: 863.9069767441861
2020-08-31: 877.578947368421

Predicting for Ethiopia__nan
2020-08-01: 497.9032738095238
2020-08-02: 527.6875
2020-08-03: 553.0981266726137
2020-08-04: 541.374
2020-08-05: 512.6508302583026
2020-08-06: 489.7547764793174
2020-08-07: 534.5409836065573
2020-08-08: 548.6197818728846
2020-08-09: 612.9067188519243
2020-08-10: 598.4484269215452
2020-08-11: 548.8563218390805
2020-08-12: 553.0981266726137
2020-08-13: 587.3164556962025
2020-08-14: 607.7042564953013
2020-08-15: 631.527027027027
2020-08-16: 633.1846826126955
202

2020-08-01: 849.1064516129032
2020-08-02: 837.5294117647059
2020-08-03: 633.8626974483597
2020-08-04: 568.4926215277778
2020-08-05: 664.6598329536827
2020-08-06: 822.9754601226994
2020-08-07: 917.1232227488151
2020-08-08: 761.1924198250729
2020-08-09: 699.9373297002725
2020-08-10: 634.8924870466321
2020-08-11: 669.0
2020-08-12: 706.728813559322
2020-08-13: 806.497512437811
2020-08-14: 822.9754601226994
2020-08-15: 831.1623376623377
2020-08-16: 895.7791017415216
2020-08-17: 851.8097902097902
2020-08-18: 928.5612582781457
2020-08-19: 962.0312213039485
2020-08-20: 1008.9129287598945
2020-08-21: 1008.9129287598945
2020-08-22: 1119.7820069204151
2020-08-23: 1152.9023779724655
2020-08-24: 1294.057410296412
2020-08-25: 1255.719298245614
2020-08-26: 1137.2384105960266
2020-08-27: 1064.334693877551
2020-08-28: 1033.3720930232557
2020-08-29: 1255.6702898550725
2020-08-30: 1372.5754716981132
2020-08-31: 1372.5754716981132

Predicting for Georgia__nan
2020-08-01: 3727.5947265625
2020-08-02: 3912.0

2020-08-26: 1268.4164244186047
2020-08-27: 1160.3471933471933
2020-08-28: 1206.823197219809
2020-08-29: 1257.559670781893
2020-08-30: 1257.559670781893
2020-08-31: 1246.8920863309352

Predicting for Guyana__nan
2020-08-01: 340.5849952516619
2020-08-02: 363.01181525241674
2020-08-03: 326.5700934579439
2020-08-04: 397.5595823095823
2020-08-05: 415.86634844868735
2020-08-06: 452.5079365079365
2020-08-07: 506.57142857142856
2020-08-08: 549.641975308642
2020-08-09: 633.8626974483597
2020-08-10: 590.4570013507429
2020-08-11: 661.1839940164548
2020-08-12: 661.1839940164548
2020-08-13: 748.5644796380091
2020-08-14: 798.1904761904761
2020-08-15: 849.390243902439
2020-08-16: 978.5
2020-08-17: 1024.0
2020-08-18: 1148.043659043659
2020-08-19: 1140.4768392370572
2020-08-20: 1135.9924242424242
2020-08-21: 1160.3471933471933
2020-08-22: 1257.559670781893
2020-08-23: 1152.9023779724655
2020-08-24: 1372.5754716981132
2020-08-25: 1307.7982708933719
2020-08-26: 1332.435593220339
2020-08-27: 1302.26099392

2020-08-30: 649.3005780346821
2020-08-31: 651.9248120300751

Predicting for Iran__nan
2020-08-01: 8424.666666666666
2020-08-02: 14522.1
2020-08-03: 15018.333333333334
2020-08-04: 12400.737903225807
2020-08-05: 12204.0
2020-08-06: 10481.0
2020-08-07: 9956.0
2020-08-08: 10573.0
2020-08-09: 11233.0
2020-08-10: 10816.82456140351
2020-08-11: 10481.0
2020-08-12: 10091.28813559322
2020-08-13: 9815.869918699187
2020-08-14: 10070.05
2020-08-15: 10481.0
2020-08-16: 9971.368421052632
2020-08-17: 9956.0
2020-08-18: 9834.0
2020-08-19: 9942.933333333332
2020-08-20: 10132.952702702703
2020-08-21: 10155.807017543859
2020-08-22: 9956.0
2020-08-23: 10051.944444444445
2020-08-24: 9588.608695652174
2020-08-25: 9586.75
2020-08-26: 10410.621621621622
2020-08-27: 9901.786516853932
2020-08-28: 9834.0
2020-08-29: 9569.642857142857
2020-08-30: 9537.34131736527
2020-08-31: 9427.176470588236

Predicting for Iraq__nan
2020-08-01: 1778.4469696969697
2020-08-02: 1744.2295514511873
2020-08-03: 1678.5689655172414
2020

2020-08-01: 966.0879590093937
2020-08-02: 1222.7422939068101
2020-08-03: 1191.9001148105626
2020-08-04: 1320.5124378109454
2020-08-05: 1059.5103578154426
2020-08-06: 853.6493812610489
2020-08-07: 1110.807471264368
2020-08-08: 1037.5532039976486
2020-08-09: 1320.5124378109454
2020-08-10: 1199.3333333333333
2020-08-11: 1294.057410296412
2020-08-12: 1176.0714285714287
2020-08-13: 1255.6702898550725
2020-08-14: 1384.9767441860465
2020-08-15: 1223.5974842767296
2020-08-16: 1163.6644844517184
2020-08-17: 1270.1734506784012
2020-08-18: 1468.016393442623
2020-08-19: 1388.8555555555556
2020-08-20: 1466.4910714285713
2020-08-21: 1466.4910714285713
2020-08-22: 1419.7449287872248
2020-08-23: 1450.4907407407406
2020-08-24: 1561.3519206939282
2020-08-25: 1623.984198645598
2020-08-26: 1574.2950495049504
2020-08-27: 1574.2950495049504
2020-08-28: 1430.4125
2020-08-29: 1497.016584402764
2020-08-30: 1552.391304347826
2020-08-31: 1560.2747138397503

Predicting for Kyrgyz Republic__nan
2020-08-01: 505.226

2020-08-22: 1204.0285035629454
2020-08-23: 1143.2547444007523
2020-08-24: 1236.1759656652362
2020-08-25: 1214.9326923076924
2020-08-26: 1108.0574229691877
2020-08-27: 1435.0263157894738
2020-08-28: 1332.435593220339
2020-08-29: 1406.6343669250646
2020-08-30: 1425.7229437229437
2020-08-31: 1542.8234851336094

Predicting for Sri Lanka__nan
2020-08-01: 483.4364864864865
2020-08-02: 503.2490366088632
2020-08-03: 493.76817737202475
2020-08-04: 521.0382099827883
2020-08-05: 523.3846153846154
2020-08-06: 498.10074441687345
2020-08-07: 507.0506329113924
2020-08-08: 529.791597567717
2020-08-09: 537.3739837398374
2020-08-10: 543.1793103448276
2020-08-11: 543.1793103448276
2020-08-12: 540.3497023809524
2020-08-13: 547.0515075376884
2020-08-14: 540.3497023809524
2020-08-15: 604.1969696969697
2020-08-16: 602.8948475289169
2020-08-17: 608.1868244981987
2020-08-18: 613.0646258503401
2020-08-19: 607.0
2020-08-20: 633.1846826126955
2020-08-21: 633.6
2020-08-22: 636.0392156862745
2020-08-23: 637.3982179

2020-08-13: 957.4897959183673
2020-08-14: 1149.6666666666667
2020-08-15: 1278.6233333333332
2020-08-16: 1448.8992137240887
2020-08-17: 1582.5147808358818
2020-08-18: 1678.1063829787233
2020-08-19: 1939.3
2020-08-20: 1927.2795906545666
2020-08-21: 1975.3822760750772
2020-08-22: 2125.6190476190477
2020-08-23: 2010.2162162162163
2020-08-24: 1996.4355368026254
2020-08-25: 1962.2867383512544
2020-08-26: 1894.1378944013597
2020-08-27: 1845.4679319371728
2020-08-28: 1927.0
2020-08-29: 1997.689578713969
2020-08-30: 1993.4755244755245
2020-08-31: 1878.5364238410596

Predicting for Mexico__nan
2020-08-01: 5566.693877551021
2020-08-02: 4820.260869565217
2020-08-03: 6056.882352941177
2020-08-04: 7955.732484076433
2020-08-05: 8100.228033472803
2020-08-06: 7021.658951965065
2020-08-07: 5566.6802919708025
2020-08-08: 4565.083050847457
2020-08-09: 5858.132352941177
2020-08-10: 7190.701421800948
2020-08-11: 7633.360655737705
2020-08-12: 6977.925
2020-08-13: 5237.272921108742
2020-08-14: 4574.0968660968

2020-08-09: 1402.2289669861555
2020-08-10: 1545.6255773672056
2020-08-11: 1570.011320754717
2020-08-12: 1491.4732259468872
2020-08-13: 1664.142857142857
2020-08-14: 1623.984198645598
2020-08-15: 1558.875
2020-08-16: 1611.5749324063345
2020-08-17: 1574.2950495049504
2020-08-18: 1570.011320754717
2020-08-19: 1552.391304347826
2020-08-20: 1620.71758135017
2020-08-21: 1572.413208389112
2020-08-22: 1560.2747138397503
2020-08-23: 1590.6363636363637
2020-08-24: 1613.7877094972066
2020-08-25: 1612.8761904761905
2020-08-26: 1572.413208389112
2020-08-27: 1572.413208389112
2020-08-28: 1561.3519206939282
2020-08-29: 1561.3857142857144
2020-08-30: 1558.875
2020-08-31: 1545.6255773672056

Predicting for Namibia__nan
2020-08-01: 340.661180248896
2020-08-02: 378.69611307420496
2020-08-03: 394.1087169441724
2020-08-04: 402.4277673545966
2020-08-05: 458.39130434782606
2020-08-06: 484.0769230769231
2020-08-07: 566.6705202312139
2020-08-08: 634.9404255319149
2020-08-09: 656.972850678733
2020-08-10: 676.10

2020-08-02: 419.44444444444446
2020-08-03: 492.6290739782721
2020-08-04: 486.79675810473816
2020-08-05: 697.725
2020-08-06: 654.7852834740652
2020-08-07: 521.0382099827883
2020-08-08: 469.28003875968994
2020-08-09: 541.77189888301
2020-08-10: 646.4534358047016
2020-08-11: 657.5333333333333
2020-08-12: 735.9987253027406
2020-08-13: 665.1485148514852
2020-08-14: 620.1287541713015
2020-08-15: 724.0931677018633
2020-08-16: 799.0831600831601
2020-08-17: 902.855823160673
2020-08-18: 879.0277777777778
2020-08-19: 860.5735767991407
2020-08-20: 880.4046953605366
2020-08-21: 831.1623376623377
2020-08-22: 864.0683185840708
2020-08-23: 978.5
2020-08-24: 948.1594202898551
2020-08-25: 902.855823160673
2020-08-26: 941.0882352941177
2020-08-27: 905.552
2020-08-28: 923.0938833570412
2020-08-29: 966.0879590093937
2020-08-30: 984.4076492537314
2020-08-31: 955.7647058823529

Predicting for Pakistan__nan
2020-08-01: 2383.777911164466
2020-08-02: 2475.8368601917164
2020-08-03: 2384.159174649963
2020-08-04: 

2020-08-02: 723.3585858585859
2020-08-03: 735.9987253027406
2020-08-04: 775.7361111111111
2020-08-05: 750.9502958579882
2020-08-06: 756.6129032258065
2020-08-07: 814.9021739130435
2020-08-08: 863.9069767441861
2020-08-09: 806.497512437811
2020-08-10: 727.9044585987261
2020-08-11: 905.552
2020-08-12: 868.8195706028076
2020-08-13: 864.0683185840708
2020-08-14: 887.3574007220217
2020-08-15: 849.7598425196851
2020-08-16: 914.8333333333334
2020-08-17: 923.0938833570412
2020-08-18: 938.1327608982826
2020-08-19: 935.0945626477542
2020-08-20: 914.8333333333334
2020-08-21: 900.9910714285714
2020-08-22: 955.7647058823529
2020-08-23: 1008.9129287598945
2020-08-24: 1024.0
2020-08-25: 1043.852144469526
2020-08-26: 1008.9129287598945
2020-08-27: 1024.0
2020-08-28: 1083.794849282997
2020-08-29: 1157.157768924303
2020-08-30: 1212.3143021914648
2020-08-31: 1208.4368600682594

Predicting for Palestine__nan
2020-08-01: 1451.844589409056
2020-08-02: 1466.4910714285713
2020-08-03: 1612.8761904761905
2020-0

2020-08-07: 471.2681451612903
2020-08-08: 597.5532008830022
2020-08-09: 684.7323232323232
2020-08-10: 778.104
2020-08-11: 765.8303715670436
2020-08-12: 792.6439790575917
2020-08-13: 836.0608695652174
2020-08-14: 1110.96331738437
2020-08-15: 1175.1475272422465
2020-08-16: 1285.324427480916
2020-08-17: 1331.8626373626373
2020-08-18: 1405.2380952380952
2020-08-19: 1472.626923076923
2020-08-20: 1437.4609540636043
2020-08-21: 1483.5442092154422
2020-08-22: 1637.063829787234
2020-08-23: 1626.8089519650655
2020-08-24: 1654.1842105263158
2020-08-25: 1626.8089519650655
2020-08-26: 1619.025641025641
2020-08-27: 1598.1097393689986
2020-08-28: 1697.4557291666667
2020-08-29: 1854.7531914893616
2020-08-30: 1816.2610441767067
2020-08-31: 1770.803487358326

Predicting for Singapore__nan
2020-08-01: 246.29378531073445
2020-08-02: 286.25
2020-08-03: 295.8918918918919
2020-08-04: 312.6
2020-08-05: 361.64912280701753
2020-08-06: 396.1915611814346
2020-08-07: 449.93394777265746
2020-08-08: 505.226015557476

2020-08-01: 238.05810147299508
2020-08-02: 213.1797385620915
2020-08-03: 248.8
2020-08-04: 295.7278481012658
2020-08-05: 320.9468085106383
2020-08-06: 343.4033613445378
2020-08-07: 404.05576739752144
2020-08-08: 458.39130434782606
2020-08-09: 473.8205128205128
2020-08-10: 523.6584967320262
2020-08-11: 659.6160714285714
2020-08-12: 761.975
2020-08-13: 804.7474747474747
2020-08-14: 820.1475719000472
2020-08-15: 862.3356643356643
2020-08-16: 1029.747663551402
2020-08-17: 1128.6125
2020-08-18: 1291.8278145695365
2020-08-19: 1278.6233333333332
2020-08-20: 1328.015781922525
2020-08-21: 1410.2761780104713
2020-08-22: 1371.503100088574
2020-08-23: 1382.9
2020-08-24: 1430.7983193277312
2020-08-25: 1437.0621827411167
2020-08-26: 1419.7449287872248
2020-08-27: 1486.7685950413222
2020-08-28: 1416.5485786254048
2020-08-29: 1586.1918367346939
2020-08-30: 1664.142857142857
2020-08-31: 1804.8984126984126

Predicting for Slovak Republic__nan
2020-08-01: 1378.4758777469058
2020-08-02: 1560.2747138397503

2020-08-25: 1425.7229437229437
2020-08-26: 1416.5485786254048
2020-08-27: 1425.7229437229437
2020-08-28: 1417.9252747252747
2020-08-29: 1620.71758135017
2020-08-30: 1733.242673992674
2020-08-31: 1795.0804416403785

Predicting for Thailand__nan
2020-08-01: 251.12084063047286
2020-08-02: 243.8987341772152
2020-08-03: 292.58125
2020-08-04: 298.719836400818
2020-08-05: 335.0
2020-08-06: 394.22222222222223
2020-08-07: 442.97227722772277
2020-08-08: 505.9808917197452
2020-08-09: 538.5153707052441
2020-08-10: 622.38125
2020-08-11: 727.358904109589
2020-08-12: 792.0926517571885
2020-08-13: 822.9754601226994
2020-08-14: 831.1623376623377
2020-08-15: 929.0193798449612
2020-08-16: 1052.3968968968968
2020-08-17: 1154.3666666666666
2020-08-18: 1287.2941176470588
2020-08-19: 1382.9
2020-08-20: 1180.5905172413793
2020-08-21: 1328.015781922525
2020-08-22: 1380.4496837262795
2020-08-23: 1371.503100088574
2020-08-24: 1430.4125
2020-08-25: 1483.5442092154422
2020-08-26: 1430.4125
2020-08-27: 1409.9934354

2020-08-19: 1872.8753846153845
2020-08-20: 1939.3
2020-08-21: 1967.7153846153847
2020-08-22: 2155.7586206896553
2020-08-23: 2196.608938547486
2020-08-24: 2208.517342452369
2020-08-25: 2089.31935483871
2020-08-26: 2053.9321333009
2020-08-27: 1934.3111467522053
2020-08-28: 2035.2375478927204
2020-08-29: 2190.4927216585797
2020-08-30: 2162.3533852323662
2020-08-31: 2136.8002594033724

Predicting for Uganda__nan
2020-08-01: 382.6100278551532
2020-08-02: 467.35822147651004
2020-08-03: 453.45714285714286
2020-08-04: 487.0517746662325
2020-08-05: 486.79675810473816
2020-08-06: 484.0769230769231
2020-08-07: 484.0769230769231
2020-08-08: 513.96
2020-08-09: 549.641975308642
2020-08-10: 536.4182692307693
2020-08-11: 529.791597567717
2020-08-12: 558.6232449297972
2020-08-13: 582.5156651258346
2020-08-14: 563.2553956834532
2020-08-15: 633.5649504015116
2020-08-16: 636.454128440367
2020-08-17: 610.5076923076923
2020-08-18: 659.6160714285714
2020-08-19: 663.8523002421307
2020-08-20: 637.6776859504132

2020-08-02: 4061.2
2020-08-03: 4275.71339563863
2020-08-04: 3276.537931034483
2020-08-05: 3156.0896551724136
2020-08-06: 2732.6301369863013
2020-08-07: 3548.8249486652976
2020-08-08: 3211.318791946309
2020-08-09: 3129.647623400366
2020-08-10: 2650.0
2020-08-11: 2633.6976744186045
2020-08-12: 2475.7219626168226
2020-08-13: 2537.620603015075
2020-08-14: 2323.5
2020-08-15: 2269.0
2020-08-16: 2110.0
2020-08-17: 1996.4967741935484
2020-08-18: 1927.2795906545666
2020-08-19: 1892.0616883116884
2020-08-20: 1715.7426865671641
2020-08-21: 1759.421052631579
2020-08-22: 1640.1190476190477
2020-08-23: 1670.7442326980943
2020-08-24: 1591.9686847599164
2020-08-25: 1804.3909090909092
2020-08-26: 1651.9433691756271
2020-08-27: 1485.2727272727273
2020-08-28: 1273.9642857142858
2020-08-29: 1474.8083750568958
2020-08-30: 1485.0957484816006
2020-08-31: 1478.429411764706

Predicting for United States__Connecticut
2020-08-01: 2207.5208333333335
2020-08-02: 1773.0
2020-08-03: 1431.8333333333333
2020-08-04: 10

2020-08-24: 1574.2950495049504
2020-08-25: 1613.7877094972066
2020-08-26: 1619.025641025641
2020-08-27: 1558.875
2020-08-28: 1301.4575163398692
2020-08-29: 1430.7983193277312
2020-08-30: 1485.0957484816006
2020-08-31: 1560.2747138397503

Predicting for United States__Illinois
2020-08-01: 11704.622222222222
2020-08-02: 11839.125
2020-08-03: 11084.333333333334
2020-08-04: 9197.943760984183
2020-08-05: 7721.129032258064
2020-08-06: 7355.0
2020-08-07: 9585.77731092437
2020-08-08: 9847.0
2020-08-09: 10481.0
2020-08-10: 9338.5
2020-08-11: 8726.0
2020-08-12: 8504.722222222223
2020-08-13: 8554.253731343284
2020-08-14: 9135.639344262296
2020-08-15: 9499.147982062781
2020-08-16: 9569.642857142857
2020-08-17: 8984.0
2020-08-18: 8641.555555555555
2020-08-19: 8685.5
2020-08-20: 8440.985074626866
2020-08-21: 9209.117647058823
2020-08-22: 9055.0
2020-08-23: 9015.49090909091
2020-08-24: 8790.666666666666
2020-08-25: 8801.96
2020-08-26: 8615.564102564103
2020-08-27: 8672.0
2020-08-28: 8812.0
2020-08-29

2020-08-18: 4601.42205323194
2020-08-19: 5040.430555555556
2020-08-20: 5082.01253298153
2020-08-21: 4441.0
2020-08-22: 3927.487870619946
2020-08-23: 3499.104278074866
2020-08-24: 3525.2
2020-08-25: 3520.0666666666666
2020-08-26: 3405.325
2020-08-27: 3328.292650918635
2020-08-28: 3101.676646706587
2020-08-29: 2957.293103448276
2020-08-30: 2936.655109489051
2020-08-31: 2769.0038167938933

Predicting for United States__Minnesota
2020-08-01: 5800.772241992882
2020-08-02: 6260.823529411765
2020-08-03: 7056.897435897436
2020-08-04: 6400.28125
2020-08-05: 5768.0109375
2020-08-06: 4307.225806451613
2020-08-07: 4596.422480620155
2020-08-08: 5766.169421487603
2020-08-09: 5858.476190476191
2020-08-10: 5661.225609756098
2020-08-11: 4568.58275862069
2020-08-12: 3927.487870619946
2020-08-13: 3826.4655172413795
2020-08-14: 3865.8348017621147
2020-08-15: 3679.4054266614235
2020-08-16: 3555.8947368421054
2020-08-17: 3430.976389946687
2020-08-18: 3230.7475421348313
2020-08-19: 2900.2608695652175
2020-08

2020-08-02: 3381.3270735524256
2020-08-03: 3745.25641025641
2020-08-04: 3111.0
2020-08-05: 2858.1608174770968
2020-08-06: 2556.0416666666665
2020-08-07: 2900.2608695652175
2020-08-08: 2647.0
2020-08-09: 2392.1824587706146
2020-08-10: 2379.96875
2020-08-11: 2096.0942028985505
2020-08-12: 1993.4755244755245
2020-08-13: 1930.0982800982802
2020-08-14: 1942.1821184995738
2020-08-15: 1766.8474576271187
2020-08-16: 1729.2483108108108
2020-08-17: 1670.7442326980943
2020-08-18: 1773.0
2020-08-19: 1678.5689655172414
2020-08-20: 1611.5749324063345
2020-08-21: 1670.7442326980943
2020-08-22: 1611.5749324063345
2020-08-23: 1612.8761904761905
2020-08-24: 1561.3857142857144
2020-08-25: 1727.1478743068392
2020-08-26: 1620.71758135017
2020-08-27: 1486.7685950413222
2020-08-28: 1268.4164244186047
2020-08-29: 1478.9318181818182
2020-08-30: 1478.429411764706
2020-08-31: 1485.0957484816006

Predicting for United States__New Mexico
2020-08-01: 2088.4631994427677
2020-08-02: 1877.8504847564536
2020-08-03: 200

2020-08-24: 1545.6255773672056
2020-08-25: 1417.9252747252747
2020-08-26: 1381.421768707483
2020-08-27: 1612.8761904761905
2020-08-28: 1402.2289669861555
2020-08-29: 1570.011320754717
2020-08-30: 1636.8257575757575
2020-08-31: 1558.875

Predicting for United States__South Carolina
2020-08-01: 1611.5749324063345
2020-08-02: 1466.4910714285713
2020-08-03: 1623.984198645598
2020-08-04: 1466.0714285714287
2020-08-05: 1356.5699481865286
2020-08-06: 1161.4784688995214
2020-08-07: 1623.984198645598
2020-08-08: 1515.8462039752362
2020-08-09: 1486.7685950413222
2020-08-10: 1636.8257575757575
2020-08-11: 1474.8083750568958
2020-08-12: 1383.9178082191781
2020-08-13: 1435.0263157894738
2020-08-14: 1481.571814030718
2020-08-15: 1486.7685950413222
2020-08-16: 1486.7685950413222
2020-08-17: 1481.571814030718
2020-08-18: 1612.8761904761905
2020-08-19: 1451.844589409056
2020-08-20: 1561.3519206939282
2020-08-21: 1619.025641025641
2020-08-22: 1523.221237268955
2020-08-23: 1486.7685950413222
2020-08-24: 

2020-08-13: 1651.9433691756271
2020-08-14: 1612.8761904761905
2020-08-15: 1361.4
2020-08-16: 1694.611111111111
2020-08-17: 1647.1601272534465
2020-08-18: 1682.4923076923078
2020-08-19: 1620.71758135017
2020-08-20: 1643.6465280502157
2020-08-21: 1523.221237268955
2020-08-22: 1613.7877094972066
2020-08-23: 1886.910447761194
2020-08-24: 1620.71758135017
2020-08-25: 1725.1510710259302
2020-08-26: 1416.5485786254048
2020-08-27: 1497.016584402764
2020-08-28: 1383.9178082191781
2020-08-29: 1468.016393442623
2020-08-30: 1560.2747138397503
2020-08-31: 1481.571814030718

Predicting for United States__Wisconsin
2020-08-01: 6372.868965517241
2020-08-02: 6505.959183673469
2020-08-03: 5601.4
2020-08-04: 4312.925
2020-08-05: 3527.497191011236
2020-08-06: 3055.693544271498
2020-08-07: 5396.110655737705
2020-08-08: 4378.663934426229
2020-08-09: 3923.03125
2020-08-10: 3231.176470588235
2020-08-11: 3001.034327009937
2020-08-12: 2900.2608695652175
2020-08-13: 3393.1
2020-08-14: 2880.1666666666665
2020-08-

2020-08-04: 211.50259067357513
2020-08-05: 292.58125
2020-08-06: 303.44444444444446
2020-08-07: 347.39716312056737
2020-08-08: 452.5079365079365
2020-08-09: 549.8716577540107
2020-08-10: 792.0926517571885
2020-08-11: 862.3356643356643
2020-08-12: 1042.0
2020-08-13: 1283.0
2020-08-14: 1449.4204793028323
2020-08-15: 1593.341062079282
2020-08-16: 1936.5
2020-08-17: 1967.7153846153847
2020-08-18: 1951.241134751773
2020-08-19: 1955.0881057268723
2020-08-20: 2019.8743994509266
2020-08-21: 2046.0166139240507
2020-08-22: 2204.6971514242878
2020-08-23: 2196.608938547486
2020-08-24: 2053.9321333009
2020-08-25: 2018.8854166666667
2020-08-26: 2035.2375478927204
2020-08-27: 2132.0071856287427
2020-08-28: 2075.471193415638
2020-08-29: 2160.7928759894457
2020-08-30: 2063.4003813155387
2020-08-31: 2065.3389830508477

Predicting for South Africa__nan
2020-08-01: 2229.0885780885783
2020-08-02: 2053.9321333009
2020-08-03: 2123.1085582998276
2020-08-04: 2043.4161073825503
2020-08-05: 1865.581818181818
202

In [28]:
# Check the predictions
preds_df.head()

Unnamed: 0,CountryName,RegionName,Date,PredictedDailyNewCases
213,Aruba,,2020-08-01,265.902985
214,Aruba,,2020-08-02,293.048309
215,Aruba,,2020-08-03,298.49121
216,Aruba,,2020-08-04,314.415584
217,Aruba,,2020-08-05,367.650525


# Validation
This is how the predictor is going to be called during the competition.  
!!! PLEASE DO NOT CHANGE THE API !!!

In [29]:
!python3 predict.py -s 2020-08-01 -e 2020-08-04 -ip data/2020-09-30_historical_ip_new.csv -o predictions/2020-08-01_2020-08-04.csv

Generating predictions from 2020-08-01 to 2020-08-04...
Saved predictions to predictions/2020-08-01_2020-08-04.csv
Done!


In [30]:
!head predictions/2020-08-01_2020-08-04.csv

CountryName,RegionName,Date,PredictedDailyNewCases
Aruba,,2020-08-01,265.9029850746269
Aruba,,2020-08-02,293.048309178744
Aruba,,2020-08-03,298.4912099276112
Aruba,,2020-08-04,314.4155844155844
Afghanistan,,2020-08-01,403.5917602996255
Afghanistan,,2020-08-02,437.03816046966733
Afghanistan,,2020-08-03,455.265306122449
Afghanistan,,2020-08-04,510.7369246861925
Angola,,2020-08-01,388.89102564102564


# Test cases
We can generate a prediction file. Let's validate a few cases...

In [31]:
import os
from predictor_validation import validate_submission

def validate(start_date, end_date, ip_file, output_file):
    # First, delete any potential old file
    try:
        os.remove(output_file)
    except OSError:
        pass
    
    # Then generate the prediction, calling the official API
    !python3 predict.py -s {start_date} -e {end_date} -ip {ip_file} -o {output_file}
    
    # And validate it
    errors = validate_submission(start_date, end_date, ip_file, output_file)
    if errors:
        for error in errors:
            print(error)
    else:
        print("All good!")

## 4 days, no gap
- All countries and regions
- Official number of cases is known up to start_date
- Intervention Plans are the official ones

In [None]:
# validate(start_date="2020-08-01",
#          end_date="2020-08-04",
#          ip_file="data/2020-09-30_historical_ip_new.csv",
#          output_file="predictions/val_4_days.csv")

validate(start_date="2020-08-01",
         end_date="2020-08-31",
         ip_file="data/2020-09-30_historical_ip_new.csv",
         output_file="predictions/val_august.csv")

Generating predictions from 2020-08-01 to 2020-08-31...


## 1 month in the future
- 2 countries only
- there's a gap between date of last known number of cases and start_date
- For future dates, Intervention Plans contains scenarios for which predictions are requested to answer the question: what will happen if we apply these plans?

In [None]:
# %%time
# validate(start_date="2021-01-01",
#          end_date="2021-01-31",
#          ip_file="validation/data/future_ip.csv",
#          output_file="predictions/val_1_month_future.csv")

## 180 days, from a future date, all countries and regions
- Prediction start date is 1 week from now. (i.e. assuming submission date is 1 week from now)  
- Prediction end date is 6 months after start date.  
- Prediction is requested for all available countries and regions.  
- Intervention plan scenario: freeze last known intervention plans for each country and region.  

As the number of cases is not known yet between today and start date, but the model relies on them, the model has to predict them in order to use them.  
This test is the most demanding test. It should take less than 1 hour to generate the prediction file.

### Generate the scenario

In [None]:
# from datetime import datetime, timedelta

# start_date = datetime.now() + timedelta(days=7)
# start_date_str = start_date.strftime('%Y-%m-%d')
# end_date = start_date + timedelta(days=180)
# end_date_str = end_date.strftime('%Y-%m-%d')
# print(f"Start date: {start_date_str}")
# print(f"End date: {end_date_str}")

In [None]:
# from validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS
# DATA_FILE = 'data/OxCGRT_latest.csv'
# latest_df = get_raw_data(DATA_FILE, latest=True)
# scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries=None, scenario="Freeze")
# scenario_file = "predictions/180_days_future_scenario.csv"
# scenario_df.to_csv(scenario_file, index=False)
# print(f"Saved scenario to {scenario_file}")

### Check it

In [None]:
# %%time
# validate(start_date=start_date_str,
#          end_date=end_date_str,
#          ip_file=scenario_file,
#          output_file="predictions/val_6_month_future.csv")

## SPAIN

In [None]:
# start_date = datetime.now() + timedelta(days=7)
# start_date_str = start_date.strftime('%Y-%m-%d')
# end_date = start_date + timedelta(days=180)
# end_date_str = end_date.strftime('%Y-%m-%d')
# print(f"Start date: {start_date_str}")
# print(f"End date: {end_date_str}")

# DATA_FILE = 'data/OxCGRT_latest.csv'
# latest_df = get_raw_data(DATA_FILE, latest=True)
# scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries=['France'], scenario="Freeze")
# scenario_file = "predictions/180_days_future_scenario_france_freeze.csv"
# scenario_df.to_csv(scenario_file, index=False)
# print(f"Saved scenario to {scenario_file}")

# validate(start_date=start_date_str,
#          end_date=end_date_str,
#          ip_file=scenario_file,
#          output_file="predictions/val_6_month_future_france_freeze.csv")

In [None]:
# df_verify = pd.read_csv('predictions/val_6_month_future_spain_max.csv')
# x = df_verify['Date'].to_numpy()
# y = df_verify['PredictedDailyNewCases'].to_numpy()

# %matplotlib inline
# import matplotlib.pyplot as plt
# plt.plot(x,y)