In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns # for plotting
from sklearn.impute import SimpleImputer # for imputing missing values
from sklearn.preprocessing import StandardScaler # for standardizing data
from csvToExcel import toExcel
from tensorflow import keras # for building Neural Networks
from keras.models import Sequential # for creating a linear stack of layers for our Neural Network
from keras import Input # for instantiating a keras tensor
from keras.layers import Dense, SimpleRNN # for creating regular densely-connected NN layers and RNN layers

In [None]:
def checkMissingData():
    df = pd.read_csv('DataFolder/data.csv')
    print(df.info())
    
checkMissingData()

In [None]:
# Fill missing values with mean
def missingDataHandler():
    df = pd.read_csv('DataFolder/data.csv')
    imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
    # From the missing data analysis, we know that the columns 'Daily tests', 'Cases', 'Deaths' have missing values
    missingColumns = ['Daily tests', 'Cases', 'Deaths']
    for column in missingColumns:
        df[column] = imputer.fit_transform(df[column].values.reshape(-1, 1))
    print(df.info())
    return df
    
dataFrame = missingDataHandler()
toExcel(dataFrame, 'preprocessedData')

In [None]:
# Get the data for Greece until 2020-12-31
def getGreeceData(df):
    df = df[df['Entity'] == 'Greece']
    df = df[df['Date'] <= '2020-12-31']
    return df

dataFrame2 = getGreeceData(dataFrame)

In [None]:
# Add new columns for positivity percent and daily cases
def addPositivityPercent(df):
    df['Daily Cases'] = abs(df['Cases'].diff())
    df['Positivity percent'] = df['Daily Cases'] / df['Daily tests']
    df['Date'] = pd.to_datetime(df['Date'])
    imputer = SimpleImputer(missing_values = np.nan, strategy = 'mean')
    df['Positivity percent'] = imputer.fit_transform(df['Positivity percent'].values.reshape(-1, 1))
    df['Daily Cases'] = imputer.fit_transform(df['Daily Cases'].values.reshape(-1, 1))
    return df

dataset = addPositivityPercent(dataFrame2)
toExcel(dataset, 'GreeceData')
print(dataset.info())

In [None]:
# Here starts the training of the model
X = dataset.iloc[:, 11].values # The independent variable is the dates
y = dataset.iloc[:, -1].values # The dependent variable is the positivity percent
X = X.reshape(len(X), 1)
y = y.reshape(len(y), 1)

In [None]:
# Standardize the data
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)

In [None]:
model = Sequential(name="First-RNN-Model") # Model
model.add(Input(shape = X.shape, name='Input-Layer')) # Input Layer - need to speicfy the shape of inputs
model.add(SimpleRNN(units=1, activation='tanh', name='Hidden-Recurrent-Layer')) # Hidden Recurrent Layer, Tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))
model.add(Dense(units=1, activation='tanh', name='Hidden-Layer')) # Hidden Layer, Tanh(x) = sinh(x)/cosh(x) = ((exp(x) - exp(-x))/(exp(x) + exp(-x)))
model.add(Dense(units=1, activation='linear', name='Output-Layer')) # Output Layer

In [None]:
model.compile(optimizer='adam', loss='mean_squared_error') # Compile the model
# Fit keras model on the dataset
model.fit(X, y)

In [None]:
# Get the data for Greece on 2021-01-01
# We will use this data to predict the positivity percent
newDataset = dataFrame[dataFrame['Entity'] == 'Greece']
newDataset = newDataset[newDataset['Date'] == '2021-01-01']
newDataset['Daily Cases'] = abs(newDataset['Cases'].diff())
newDataset['Positivity percent'] = newDataset['Daily Cases'] / newDataset['Daily tests']
newDataset['Date'] = pd.to_datetime(newDataset['Date'])
# print(newDataset.info())
testX = newDataset.iloc[:, 11].values
# print(testX)

In [None]:
# Predict the result on training data
pred_train = model.predict(testX)