# RNN Models

### Imports and set seed

In [1]:
import pandas as pd
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, SimpleRNN, LSTM

In [2]:
np.random.seed(10)

### Load data

In [3]:
covid = pd.read_csv("../datasets/WHO-COVID-19-global-data.csv")

### Create mini datasets of each country with the first day having >= 100 cases as the start date

In [4]:
# a dataframe of an individual country
# starting at the first day with over 100 cases
# with just the date and cumulative cases
# with the index reset
covid_shifted = covid[covid['Cumulative_cases'] >= 100]

lst = []

for country in covid_shifted['Country'].unique():
    temp_df = covid_shifted[(covid_shifted['Cumulative_cases'] >= 100) 
                            & (covid_shifted['Country'] == country)][['Date_reported', 'Cumulative_cases']].reset_index().drop(columns='index')
    temp_df['Days_since_100'] = [i for i in range(len(temp_df))]
# add it to the list as a tuple to keep country name while reducing df size
    lst.append((country, temp_df))

# view the first three rows of the first four country reports
# for tup in lst[0:4]:
#     print(tup[0])
#     display(tup[1].head(3))

## Simple RNN
The following code works but acted as a test / recreation of the datatechnotes.com [guide](https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html). \
For the code actually used, see below under Automation as that is organized in a more modular reusable manner.

In [None]:
# # test country
# country = lst[1]

# step = 4

# X = [val for val in country[1]['Cumulative_cases']]
# # add "step" extra to be used to make the final pred
# X = np.append(X, X[-1::]*step)

# # convert into dataset matrix
# # credit to https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html
# def convertToMatrix(data, step):
#  X, Y =[], []
#  for i in range(len(data)-step):
#   d=i+step  
#   X.append(data[i:d,])
#   Y.append(data[d,])
#  return np.array(X), np.array(Y)

# X_train, y_train =convertToMatrix(X,step)

# X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))

# # SimpleRNN model
# model = Sequential()
# model.add(SimpleRNN(units=32, input_shape=(1,step), activation='relu'))
# model.add(Dense(16, activation='relu'))
# model.add(Dense(1))
# model.compile(loss='mean_squared_error', optimizer='rmsprop')
# model.summary()

# model.fit(X_train, y_train, epochs=244, batch_size=16)

# preds = model.predict(X_train)

# df = pd.DataFrame(preds)

# df.size

# df['orig'] = X[0:468]

# df[0] = [int(i) for i in df[0]]

# df

---
### Automation of SimpleRNN

In [5]:
# function to convert into dataset matrix
# credit to https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html
def convertToMatrix(data, step):
 X, Y =[], []
 for i in range(len(data)-step):
  d=i+step  
  X.append(data[i:d,])
  Y.append(data[d,])
 return np.array(X), np.array(Y)

In [6]:
# function to 
# accepts a tuple ((country name, dataset), step size)
def make_predictable(country_tuple, step):
#     get the dataframe
    country = country_tuple[1]
#     samples to take at a time
    step = 4
    X = [val for val in country['Cumulative_cases']]
#     add "step" extra to be used to make the final pred
    X = np.append(X, X[-1::]*step)
    
    X_a, y_a = convertToMatrix(X,step)
    X_a = np.reshape(X_a, (X_a.shape[0], 1, X_a.shape[1]))
#     X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
    return X_a, y_a

In [7]:
# accepts a tuple ((country name, dataset))
def make_model(country_tuple):
    step = 4
    
#     use make_predictable to set up the data
    X_train, y_train = make_predictable(country_tuple, step)
    
#     SimpleRNN model
    model = Sequential()
    model.add(SimpleRNN(units=32, input_shape=(1,step), activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='rmsprop')
#     model.summary()
    
    model.fit(X_train, y_train, epochs=210, batch_size=16, verbose=0)
    print(model.history)

    #TODO: return a tuple with the country and the model
    return country_tuple, model

In [8]:
# a small sample (2) to test
# the next cell will run for all countries
model_temp = []

for country in lst[0:2]:
    print(country[0])
    model_temp.append(make_model(country))

Afghanistan
<tensorflow.python.keras.callbacks.History object at 0x000001F4DEEBF610>
Albania
<tensorflow.python.keras.callbacks.History object at 0x000001F4DF867B20>


In [None]:
# # loop to create the full list of models
# # HIGHLY RECOMMENDED TO LEAVE THIS ONE COMMENTED OUT UNLESS YOU WANT TO LET IT RUN FOR A LONG TIME (over an hour on Strix's machine)
# model_ls = []

# for country in lst:
#     model_ls.append(make_model(country))
#     print(f'{country[0]} completed')

### Making Predictions

In [24]:
# make predictions for a target country using a model country
def make_preds(country_model, target_country):
#     make preds on the target country
    preds = country_model.predict(make_predictable(target_country, step=4)[0])
#     return it as a df
    df = pd.DataFrame(preds)
    return df

In [45]:
for ctry in lst[0:2]:
    print(f'--------------------------------------------------------')
    print(f'Country: {ctry[0]}, Samples: {len(ctry[1])}')
    
    for model in model_temp:
        tempreds = make_preds(model[1], ctry)
#         ctry[1][f'{model[1]}_m_p'] = 
        display(ctry[1])

--------------------------------------------------------
Country: Afghanistan, Samples: 466


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_preds,Albania_preds
0,2020-03-28,106,0,145.319138,146.663605
1,2020-03-29,114,1,168.752136,172.722336
2,2020-03-30,114,2,206.039719,206.587036
3,2020-03-31,166,3,247.041855,244.479172
4,2020-04-01,192,4,270.457092,262.256073
...,...,...,...,...,...
461,2021-07-02,122156,461,126381.257812,125262.476562
462,2021-07-03,123485,462,127886.421875,126646.773438
463,2021-07-04,124748,463,128109.500000,127361.500000
464,2021-07-05,125937,464,128215.570312,127724.078125


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_preds,Albania_preds
0,2020-03-28,106,0,145.319138,146.663605
1,2020-03-29,114,1,168.752136,172.722336
2,2020-03-30,114,2,206.039719,206.587036
3,2020-03-31,166,3,247.041855,244.479172
4,2020-04-01,192,4,270.457092,262.256073
...,...,...,...,...,...
461,2021-07-02,122156,461,126381.257812,125262.476562
462,2021-07-03,123485,462,127886.421875,126646.773438
463,2021-07-04,124748,463,128109.500000,127361.500000
464,2021-07-05,125937,464,128215.570312,127724.078125


--------------------------------------------------------
Country: Albania, Samples: 468


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_preds,Albania_preds
0,2020-03-25,108,0,155.568054,154.627838
1,2020-03-26,136,1,173.680435,170.565842
2,2020-03-27,148,2,186.690842,182.654327
3,2020-03-28,159,3,204.038986,198.665512
4,2020-03-29,174,4,228.912430,223.620132
...,...,...,...,...,...
463,2021-07-01,132521,463,133122.609375,132999.406250
464,2021-07-02,132523,464,133124.828125,133002.593750
465,2021-07-03,132526,465,133125.921875,133004.343750
466,2021-07-04,132534,466,133125.093750,133005.484375


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_preds,Albania_preds
0,2020-03-25,108,0,155.568054,154.627838
1,2020-03-26,136,1,173.680435,170.565842
2,2020-03-27,148,2,186.690842,182.654327
3,2020-03-28,159,3,204.038986,198.665512
4,2020-03-29,174,4,228.912430,223.620132
...,...,...,...,...,...
463,2021-07-01,132521,463,133122.609375,132999.406250
464,2021-07-02,132523,464,133124.828125,133002.593750
465,2021-07-03,132526,465,133125.921875,133004.343750
466,2021-07-04,132534,466,133125.093750,133005.484375


---
TODO:

## LSTM
TODO: make an LSTM model with backpropagation or more layers

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
model = keras.Sequential()
# Add an Embedding layer expecting input vocab of size 1000, and
# output embedding dimension of size 64.
model.add(layers.Embedding(input_dim=1000, output_dim=64))

# Add a LSTM layer with 128 internal units.
model.add(layers.LSTM(32))

# Add a Dense layer with 10 units.
model.add(layers.Dense(10))

model.summary()