# Covid 19 Prediction Time Series - Adit Ahmedabadi

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
# from google.colab import files
import tensorflow as tf

print(tf.__version__)

2.4.1


In [6]:
covid_df = pd.read_csv('time_series_covid_19_confirmed.csv')

In [7]:
covid_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,1/10/21,1/11/21,1/12/21,1/13/21,1/14/21,1/15/21,1/16/21,1/17/21,1/18/21,1/19/21
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,53489,53538,53584,53584,53775,53831,53938,53984,54062,54141
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,63595,63971,64627,65334,65994,66635,67216,67690,67982,68568
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,102144,102369,102641,102860,103127,103381,103611,103833,104092,104341
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,8586,8586,8682,8818,8868,8946,9038,9083,9083,9194
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,18193,18254,18343,18425,18613,18679,18765,18875,18926,19011


In [8]:
india_df = covid_df[covid_df['Country/Region'] == 'India']
india_df.drop(['Lat' , 'Long' , 'Province/State',  'Country/Region'] , axis = 1 , inplace = True)
india_df

Unnamed: 0,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,1/31/20,...,1/10/21,1/11/21,1/12/21,1/13/21,1/14/21,1/15/21,1/16/21,1/17/21,1/18/21,1/19/21
146,0,0,0,0,0,0,0,0,1,1,...,10466595,10479179,10495147,10512093,10527683,10542841,10557985,10571773,10581823,10595639


In [9]:
india_df.shape

(1, 364)

In [10]:
time = np.array(range(364))
series = np.hstack(india_df.to_numpy())
series.shape

(364,)

In [11]:
time = np.array(range(364))

In [12]:
def plot_series(time , series , start = 0 , end = None , text = None):
    plt.plot(time[start : end] , series[start : end] , label = text)
    if text:
        if text is list:
            plt.legend(text)
        else:
            plt.legend([text])
    plt.ylabel('Sunspot Value')
    plt.xlabel('Time')
    plt,title('Sunspot ')

def split_dataset(series , time , split_val):
    time_train = time[:split_val]
    series_train = series[:split_val]
    time_val = time[split_val : ]
    series_val = series[split_val : ]
    return (time_train , series_train , time_val , series_val)

def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series = tf.expand_dims(series, axis=-1)
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size + 1, shift=1, drop_remainder = True)
    ds = ds.flat_map(lambda w: w.batch(window_size + 1))    
    ds = ds.shuffle(shuffle_buffer)
    ds = ds.map(lambda w: (w[:-1], w[1:]))
    return ds.batch(batch_size).prefetch(1)

In [13]:
time_train , series_train , time_val , series_val  = split_dataset(series , time , 300)
print(len(time_train) , len(time_val))

300 64


In [14]:
tf.keras.backend.clear_session()
tf.random.set_seed(64)
np.random.seed(64)
window_size = 32
batch_size = 64
shuffle_buffer_size = 200
train_set = windowed_dataset(series_train , window_size , batch_size , shuffle_buffer_size)
print(train_set , series_train.shape)


<PrefetchDataset shapes: ((None, None, 1), (None, None, 1)), types: (tf.int64, tf.int64)> (300,)


In [17]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv1D(filters=60, kernel_size=5,
                      strides=1, padding="causal",
                      activation="relu",
                      input_shape=[None, 1]),
  tf.keras.layers.LSTM(64, return_sequences=True),
  tf.keras.layers.LSTM(64),
  tf.keras.layers.Dense(30, activation="relu"),
  tf.keras.layers.Dense(10, activation="relu"),
  tf.keras.layers.Dense(1),
])

# lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-8 * 10**(epoch / 20))
optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)
model.compile(loss=tf.keras.losses.Huber(),
              optimizer=optimizer,
              metrics=["mae"])

In [None]:
history = model.fit(train_set, epochs=1000)