In [1]:
import pandas as pd 
import numpy as np 
from sklearn import preprocessing
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.neural_network import MLPRegressor
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('NI.csv', sep=",")

In [3]:
data.head()

Unnamed: 0,date,areaName,areaCode,newCasesByPublishDate,cumCasesByPublishDate,newDeaths28DaysByDeathDate,cumDeaths28DaysByDeathDate,newAntibodyTestsByPublishDate,newLFDTestsBySpecimenDate,newPCRTestsByPublishDate,newPCRTestsByPublishDateRollingSum,hospitalCases,newAdmissions,newPeopleVaccinatedCompleteByVaccinationDate,newVaccinesGivenByPublishDate,VaccineRegisterPopulationByVaccinationDate
0,10/03/2022,Northern Ireland,N92000002,2602.0,646794.0,,,,,,,,,,,
1,09/03/2022,Northern Ireland,N92000002,2683.0,644192.0,2.0,3244.0,,0.0,20295.0,118210.0,483.0,21.0,,1317.0,
2,08/03/2022,Northern Ireland,N92000002,2669.0,641509.0,4.0,3242.0,,0.0,19739.0,117337.0,506.0,37.0,,1037.0,
3,07/03/2022,Northern Ireland,N92000002,5769.0,638840.0,4.0,3238.0,,0.0,19464.0,115735.0,518.0,29.0,,1078.0,
4,06/03/2022,Northern Ireland,N92000002,0.0,633071.0,2.0,3234.0,,0.0,13711.0,117006.0,544.0,25.0,,903.0,


In [4]:
data.drop(columns=['areaName', 'areaCode'], inplace=True)

In [5]:
data['date'] = pd.to_datetime(data['date'])
data.sort_values(by='date', ascending=True, inplace=True)
data = data.set_index('date')

In [6]:
n = len(data)

In [7]:
data['newCasesByPublishDateSmoothed'] = data['newCasesByPublishDate'].ewm(span=n).mean()

In [8]:
data = data.fillna(0)

In [9]:
ni_smoothed = data['newCasesByPublishDateSmoothed']

In [10]:
ni_smoothed.to_excel("ni_smoothed.xlsx")

In [9]:
data.shape

(740, 14)

In [10]:
df = data.copy(deep=True)

In [11]:
df.loc[:,'ncs'] = df.loc[:,'newCasesByPublishDateSmoothed'].shift()
df.loc[:,'ncs_diff'] = df.loc[:,'ncs'].diff()
df.loc[:,'ncs2'] = df.loc[:,'ncs'].shift()
df.loc[:,'ncs_diff2'] = df.loc[:,'ncs2'].diff()
df.loc[:,'ncs3'] = df.loc[:,'ncs2'].shift()
df.loc[:,'ncs_diff3'] = df.loc[:,'ncs3'].diff()
#df.loc[:,'ncs4'] = df.loc[:,'ncs3'].shift()
#df.loc[:,'ncs_diff4'] = df.loc[:,'ncs4'].diff()
#df.loc[:,'ncs5'] = df.loc[:,'ncs4'].shift()
#df.loc[:,'ncs_diff5'] = df.loc[:,'ncs5'].diff()
#df.loc[:,'ncs6'] = df.loc[:,'ncs5'].shift()
#df.loc[:,'ncs_diff6'] = df.loc[:,'ncs6'].diff()
#df.loc[:,'ncs7'] = df.loc[:,'ncs6'].shift()
#df.loc[:,'ncs_diff7'] = df.loc[:,'ncs7'].diff()

In [12]:
df = df.fillna(0)

In [13]:
n = len(df)

In [14]:
variables = df.columns.drop('newCasesByPublishDateSmoothed')

In [15]:
X_train = df.iloc[0:int(n*0.7)].drop(['newCasesByPublishDateSmoothed'], axis=1)
y_train = df.iloc[0:int(n*0.7)].drop(columns=variables, axis=1)

In [16]:
X_test = df.iloc[int(n*0.9):].drop(['newCasesByPublishDateSmoothed'], axis=1)
y_test = df.iloc[int(n*0.9):].drop(columns=variables, axis=1)

In [17]:
X_val = df.iloc[int(n*0.7):int(n*0.9)].drop(['newCasesByPublishDateSmoothed'], axis=1)
y_val = df.iloc[int(n*0.7):int(n*0.9)].drop(columns=variables, axis=1)

In [18]:
model = MLPRegressor()

In [19]:
param_search = {
    "hidden_layer_sizes": [(1,),(50,),(100,),(150,),(200,)],
    "activation": ["identity", "logistic", "tanh", "relu"],
    "solver": ["lbfgs", "sgd", "adam"],
    "alpha": [0.00005,0.0005, 0.005],
    "learning_rate": ['constant', 'invscaling', 'adaptive']
}

In [None]:
tsvc = TimeSeriesSplit(n_splits=5)
gsearch = GridSearchCV(estimator=model, cv=tsvc, param_grid=param_search, scoring='r2')
gsearch.fit(X_train, y_train)
best_score = gsearch.best_score_
best_model = gsearch.best_estimator_

In [None]:
print(best_model)

In [None]:
print(best_score)

In [None]:
from sklearn.metrics import r2_score

y_pred = best_model.predict(X_test)

print(r2_score(y_test, y_pred))

In [None]:
y_val_pred = best_model.predict(X_val)

In [None]:
print(r2_score(y_val, y_val_pred))