# Covid 19 Cases PredictionAssignment

### Tasks include:
#### 1. Develop an LSTM model to predict COVID cases in Malaysia.
#### 2. Ensure MAPE error is less than 1% on the testing dataset.
#### 3. Display training loss using TensorBoard.

In [None]:
# 1 Setup
import os
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import IPython.display
import matplotlib as mpl
from  keras import callbacks
import matplotlib.pyplot as plt
from windowing import WindowGenerator

In [None]:
# 2 Load train data as Dataframe
df_cases_train_malaysia = pd.read_csv('2024_COVID_TRAIN_SET.csv')

In [None]:
# 2.1 Load train data as Dataframe
df_cases_train_malaysia_test = pd.read_csv('2024_COVID_TEST_SET.csv')

In [None]:
df_cases_train_malaysia = pd.concat([df_cases_train_malaysia, df_cases_train_malaysia_test], ignore_index=True)

#### Data Inspection

In [None]:
df_cases_train_malaysia.head(20)

In [None]:
df_cases_train_malaysia.keys()

In [None]:
df_cases_train_malaysia.info()

In [None]:
# Fill NAN with 0
df_cases_train_malaysia.fillna(0, inplace=True)
df_cases_train_malaysia_test.fillna(0, inplace=True)

In [None]:
df_cases_train_malaysia.describe().transpose()

In [None]:
# Contain the time steps into seperate_variables
date_time = pd.to_datetime(df_cases_train_malaysia.pop('date'), format='%d/%m/%Y')


In [None]:
# Data Visualisation
plt.rcParams['figure.figsize'] = (12, 3 * 9)

plot_cols = ['cases_new', 'cases_import', 'cases_recovered', 'cases_active',
       'cases_cluster', 'cases_unvax', 'cases_pvax', 'cases_fvax',
       'cases_boost', 'cluster_import','cluster_religious', 'cluster_community', 'cluster_highRisk',
       'cluster_education', 'cluster_detentionCentre', 'cluster_workplace']

plt.rcParams['figure.figsize'] = (12, 3 * len(plot_cols))

plot_features = df_cases_train_malaysia[plot_cols]
plot_features.index = date_time
_ = plot_features.plot(subplots=True)

# 1. Develop LSTM

In [None]:
#Data Splitting (70%, 20%, 10%) split for the training, validation, and test sets
column_indices = {name: i for i, name in enumerate(df_cases_train_malaysia.columns)}

n = len(df_cases_train_malaysia)
train_df = df_cases_train_malaysia[0:int(n*0.7)]
val_df = df_cases_train_malaysia_test[int(n*0.7):int(n*0.9)]
test_df = df_cases_train_malaysia[int(n*0.9):]

num_features = df_cases_train_malaysia.shape[1]

cl = train_df.columns

In [None]:
train_df.describe().transpose()

In [None]:
#Data Normalizing Standardscaler
train_mean = train_df.mean()
train_std = train_df.std()

train_dfnn = (train_df - train_mean) / train_std
val_dfnn = (val_df - train_mean) / train_std
test_dfnn = (test_df - train_mean) / train_std


In [None]:
# StandardScaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
train_dfss = scaler.fit_transform(train_df)
train_dfss = pd.DataFrame(train_dfss, columns=cl)

val_dfss = scaler.fit_transform(val_df)
val_dfss = pd.DataFrame(val_dfss,columns=cl)

test_dfss = scaler.fit_transform(test_df)
test_dfss = pd.DataFrame(test_dfss,columns=cl)

In [None]:
#Normilizer
from sklearn.preprocessing import Normalizer

n = Normalizer()
train_dfn = n.fit_transform(train_df)
train_dfn = pd.DataFrame(train_dfn, columns=cl)

val_dfn = n.fit_transform(val_df)
val_dfn = pd.DataFrame(val_dfn,columns=cl)

test_dfn = n.fit_transform(test_df)
test_dfn = pd.DataFrame(test_dfn,columns=cl)

In [None]:
#MinMax
from sklearn.preprocessing import MinMaxScaler

minmax = MinMaxScaler()
train_dfmm = minmax.fit_transform(train_df)
train_dfmm = pd.DataFrame(train_dfmm, columns=cl)

val_dfmm = minmax.fit_transform(val_df)
val_dfmm = pd.DataFrame(val_dfmm,columns=cl)

test_dfmm = minmax.fit_transform(test_df)
test_dfmm = pd.DataFrame(test_dfmm,columns=cl)

In [None]:
#Robust Scaler
from sklearn.preprocessing import RobustScaler

rs = RobustScaler()
train_dfrs = rs.fit_transform(train_df)
train_dfrs = pd.DataFrame(train_dfrs, columns=cl)

val_dfrs = rs.fit_transform(val_df)
val_dfrs = pd.DataFrame(val_dfrs,columns=cl)

test_dfrs = rs.fit_transform(test_df)
test_dfrs = pd.DataFrame(test_dfrs,columns=cl)

In [None]:
train_dfnn.fillna(0, inplace=True)

In [None]:
train_dfnn.describe().transpose()

In [None]:
# Single step
wide_window = WindowGenerator(input_width=30,label_width=30,shift=1,train_df=train_dfnn, val_df=val_dfnn, test_df=test_dfnn,label_columns=['cases_new'])
wide_window

In [None]:
wide_window.plot(plot_col='cases_new',max_subplots=4)

In [None]:
# Filepath for checkpoint and tensorboard
filepath = os.getcwd()

print(filepath)

In [None]:
# Checkpoint Path
checkpoint_filepath = r"c:\Users\USER\Downloads\AI_SHRDC\Capstone1\chechpoint.h5"

In [None]:
# Tensorboard Path
logpath = os.path.join(filepath,'tensorboard_log',datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
tb = callbacks.TensorBoard(logpath)

In [None]:
# Implement Regularizer
regularization_strength = 0.01

In [None]:
# Create LSTM single 
lstm_single_step = tf.keras.models.Sequential()

lstm_single_step.add(tf.keras.layers.LSTM(32, return_sequences=True))

lstm_single_step.add(tf.keras.layers.Dense(1))

In [None]:
from windowing import compile_and_fit

In [None]:
# Model Training
history_single_step = compile_and_fit(lstm_single_step,wide_window,checkpoint_filepath,tb)

In [None]:
# Graph Between Loss and Val Loss
fig = plt.figure(figsize=(10,10))
plt.plot(history_single_step.history['loss'],color='teal',label='loss')
plt.plot(history_single_step.history['val_loss'],color='orange',label='val_loss')
fig.suptitle('Loss',fontsize=10)
plt.legend(loc='upper left')
plt.show()

In [None]:
# Graph Absolute Percentage
fig = plt.figure(figsize=(10,10))
plt.plot(history_single_step.history['mean_absolute_percentage_error'],color='teal',label='loss')
plt.plot(history_single_step.history['val_mean_absolute_percentage_error'],color='orange',label='val_loss')
fig.suptitle('Loss',fontsize=10)
plt.legend(loc='upper left')
plt.show()

In [None]:
lstm_single_step.load_weights(checkpoint_filepath)

In [None]:
# Plot the result 
wide_window.plot(plot_col='cases_new',model=lstm_single_step,max_subplots=4)

In [None]:
# Mutlistep Model
OUT_STEPS = 30
mutli_window = WindowGenerator(input_width=30,label_width=OUT_STEPS,shift=OUT_STEPS,train_df=train_dfnn, val_df=val_dfnn, test_df=test_dfnn,label_columns=['cases_new'])
mutli_window

In [None]:
# Checkpoint Path
checkpoint_filepath_m = r"c:\Users\USER\Downloads\AI_SHRDC\Capstone1\chechpoint-mutli.h5"

In [None]:
# Tensorboard Path
logpath = os.path.join(filepath,'tensorboard_log_m',datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
tb_m = callbacks.TensorBoard(logpath)

In [None]:
mutli_window.plot(plot_col='cases_new',max_subplots=4)

In [None]:
# Create LSTM mutli 
lstm_mutli_step = tf.keras.Sequential()

lstm_mutli_step.add(tf.keras.layers.LSTM(32,return_sequences=False))

lstm_mutli_step.add(tf.keras.layers.Dense(OUT_STEPS*num_features))

lstm_mutli_step.add(tf.keras.layers.Reshape([OUT_STEPS,num_features]))

In [None]:
history_mutli_step = compile_and_fit(lstm_mutli_step,mutli_window,checkpoint_filepath_m,tb_m)

In [None]:
mutli_window.plot(plot_col='cases_new',model=lstm_mutli_step,max_subplots=4)