# McHacks 2022 - COVID-19 Stringency Index Predictor

# Imports

In [None]:
import numpy as np
import pandas as pd
import io
from sklearn import preprocessing
from sklearn.utils import shuffle
from google.colab import files

Saving McHacks_OCcovid.csv to McHacks_OCcovid.csv
Saving McHacks_EUcovid.csv to McHacks_EUcovid.csv
Saving McHacks_ALLcovid.csv to McHacks_ALLcovid.csv
Saving McHacks_NAcovid.csv to McHacks_NAcovid.csv


# Data

In [None]:
# Loading pre-processed data from csv
uploaded = files.upload()

NA_frame = pd.read_csv(io.BytesIO(uploaded['McHacks_NAcovid.csv'])) # North America
OC_frame = pd.read_csv(io.BytesIO(uploaded['McHacks_OCcovid.csv'])) # Oceanea
EU_frame = pd.read_csv(io.BytesIO(uploaded['McHacks_EUcovid.csv'])) # Europe
All_frame = pd.read_csv(io.BytesIO(uploaded['McHacks_ALLcovid.csv'])) # All three continents combined

# Useful metrics for the UI calibration
for name in All_frame.columns:
    max = All_frame[name].max()
    min = All_frame[name].min()
    median = All_frame[name].median()
    print(f'{name}:   min: {min}    max: {max}   median: {median}')



new_cases:   min: -74347    max: 1368382   median: 786.0
new_cases_smoothed:   min: -6223.0    max: 802223.429   median: 949.429
new_deaths:   min: -217    max: 4442   median: 7.0
new_deaths_smoothed:   min: -14.429    max: 3403.0   median: 8.286
reproduction_rate:   min: 0.19    max: 3.77   median: 1.05
icu_patients:   min: 0    max: 28891   median: 110.0
hosp_patients:   min: 0    max: 152032   median: 667.0
positive_rate:   min: 0.0    max: 0.5522   median: 0.0355
tests_per_case:   min: 0.0    max: 27035.0   median: 27.9
total_vaccinations:   min: 0    max: 530359800   median: 510049.0
people_vaccinated:   min: 0    max: 249586987   median: 356376.0
people_fully_vaccinated:   min: 0    max: 208808805   median: 114796.0
total_boosters:   min: 0    max: 81521247   median: 0.0
new_vaccinations_smoothed:   min: 0    max: 3508672   median: 5312.0
new_people_vaccinated_smoothed:   min: 0    max: 2028778   median: 1824.0
stringency_index:   min: 20.37    max: 100.0   median: 56.02
populati

In [None]:
# Data processing functions

def normalize_pop(csv):
  df = csv.copy()

  to_be_normalized = ['new_cases','new_cases_smoothed', 'new_deaths', 'new_deaths_smoothed', 'icu_patients', 'hosp_patients', 'total_vaccinations', 'people_vaccinated',
 'people_fully_vaccinated', 'total_boosters', 'new_vaccinations_smoothed',
 'new_people_vaccinated_smoothed']

  for i in to_be_normalized:
    df[i] = df[i]*1000000//df["population"]

  return df

def normalize_features(df):
    result = df.copy()

    for feature_name in df.columns:
        max_value = df[feature_name].max()
        min_value = df[feature_name].min()

        if(max_value==min_value):
          max_value = min_value+1

        result[feature_name] = (df[feature_name] - min_value) // (max_value - min_value)

    return result

def extract_xy(csv):
  csv = shuffle(csv)
  y = csv["stringency_index"].to_numpy()
  csv = normalize_pop(csv)
  #csv = normalize_features(csv)
  x = csv.drop(["stringency_index"], axis=1).to_numpy()
  scaler = preprocessing.StandardScaler()
  scaler.fit(x)
  x = scaler.transform(x)

  return x,y

def split(x,y,low,high):
  x_train = np.concatenate((x[0:low], x[high:x.shape[0]]))
  y_train = np.concatenate((y[0:low], y[high:x.shape[0]]))
  x_test = x[low:high]
  y_test = y[low:high]
  return x_train, y_train, x_test, y_test


# Splitting data into features and labels
x_All, y_All = extract_xy(All_frame)

# Uncomment if want to train only on specific continent
# x_OC, y_OC = extract_xy(OC_frame)
# x_EU, y_EU = extract_xy(EU_frame)


# Multi-layer Perceptron Regressor

#### Model definition

In [None]:
def runMLP(X_train, y_train, X_test, y_test, momentum = 0.9):
  regr = MLPRegressor(random_state=1, max_iter=5000, momentum=momentum, verbose=True, tol=0.001, power_t=0.7).fit(X_train, y_train)
  y_pred = regr.predict(X_test)
  print("R Squared Error: " + str(regr.score(X_test, y_test)))
  return y_pred

#### Training

In [None]:
# Monitoring training for early stopping
# Randomly train and test data on last 25% of the data

X_train, X_test, y_train, y_test = train_test_split(x_All, y_All)
y_pred = runMLP(X_train,y_train, X_test, y_test)
print("Momentum: "+ str(m))
print(f'Y_Pred {np.round(y_pred, 2)}')
print(f'True OC{y_test}')

Iteration 1, loss = 1607.74132628
Iteration 2, loss = 1407.98324006
Iteration 3, loss = 1144.55547018
Iteration 4, loss = 841.13804756
Iteration 5, loss = 557.29212959
Iteration 6, loss = 352.90907659
Iteration 7, loss = 237.04174321
Iteration 8, loss = 185.64536510
Iteration 9, loss = 160.92431884
Iteration 10, loss = 145.91345680
Iteration 11, loss = 135.03674389
Iteration 12, loss = 126.99347546
Iteration 13, loss = 120.40830257
Iteration 14, loss = 115.17120774
Iteration 15, loss = 110.73626744
Iteration 16, loss = 106.75638494
Iteration 17, loss = 103.44174640
Iteration 18, loss = 100.80428508
Iteration 19, loss = 98.15451521
Iteration 20, loss = 96.18225376
Iteration 21, loss = 94.26855968
Iteration 22, loss = 92.78851396
Iteration 23, loss = 91.63889357
Iteration 24, loss = 90.28883550
Iteration 25, loss = 89.61369471
Iteration 26, loss = 87.89802322
Iteration 27, loss = 86.81367936
Iteration 28, loss = 85.69023368
Iteration 29, loss = 84.72655956
Iteration 30, loss = 83.8067121

In [None]:
# Randomly splitting train and test data
X_train, y_train, X_test, y_test = split(x_All,y_All,600, 800)
y_pred = runMLP(X_train, y_train, X_test, y_test)
print(f'Y_Pred {np.round(y_pred, 2)}')
print(f'True OC{y_test}')

R Squared Error: 0.7260559184819968
Y_Pred [62.74 68.13 68.68 52.81 46.44 48.02 73.58 51.63 85.56 57.89 72.95 72.04
 68.35 51.99 50.71 65.53 48.99 79.01 67.89 57.33 44.11 34.36 44.66 65.21
 71.09 62.48 66.   26.34 45.32 43.15 79.33 49.94 57.15 49.4  87.14 43.7
 29.32 54.79 73.18 51.1  55.91 45.06 71.54 65.6  40.75 56.95 66.16 25.9
 48.07 58.16 65.   57.57 48.4  75.08 60.47 80.63 47.69 32.96 60.77 60.17
 55.28 50.44 42.99 57.87 78.76 65.29 45.97 41.19 70.87 65.7  68.1  51.06
 81.74 52.88 77.03 58.23 59.63 53.18 73.56 41.26 70.71 65.57 54.07 64.38
 51.74 34.03 63.24 46.52 42.48 54.22 61.1  44.23 62.19 65.96 80.87 57.4
 66.22 47.36 70.3  60.83 41.86 53.57 35.83 52.23 48.91 47.57 64.78 52.57
 48.56 37.16 60.58 49.85 51.31 70.53 36.65 42.41 64.55 39.35 62.93 60.69
 88.12 73.09 33.22 74.17 41.75 63.33 54.77 61.78 52.63 54.19 61.98 44.43
 52.38 59.94 60.76 63.52 58.52 43.94 81.38 31.12 53.99 47.26 69.86 38.4
 42.77 56.9  36.69 54.34 41.36 66.78 55.32 28.02 54.06 91.11 68.08 60.44
 47.34 61.75