# RNN Models

### Imports and set seed

In [1]:
import pandas as pd
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, SimpleRNN, LSTM

In [2]:
np.random.seed(10)

### Load data

In [3]:
covid = pd.read_csv("../datasets/WHO-COVID-19-global-data.csv")

### Create mini datasets of each country with the first day having >= 100 cases as the start date

In [4]:
# a dataframe of an individual country
# starting at the first day with over 100 cases
# with just the date and cumulative cases
# with the index reset
covid_shifted = covid[covid['Cumulative_cases'] >= 100]

lst = []

for country in covid_shifted['Country'].unique():
    temp_df = covid_shifted[(covid_shifted['Cumulative_cases'] >= 100) 
                            & (covid_shifted['Country'] == country)][['Date_reported', 'Cumulative_cases']].reset_index().drop(columns='index')
    temp_df['Days_since_100'] = [i for i in range(len(temp_df))]
# add it to the list as a tuple to keep country name while reducing df size
    lst.append((country, temp_df))

# view the first three rows of the first four country reports
# for tup in lst[0:4]:
#     print(tup[0])
#     display(tup[1].head(3))

## Simple RNN
The following code works but acted as a test / recreation of the datatechnotes.com [guide](https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html). \
For the code actually used, see below under Automation as that is organized in a more modular reusable manner.

In [5]:
# # test country
# country = lst[1]

# step = 4

# X = [val for val in country[1]['Cumulative_cases']]
# # add "step" extra to be used to make the final pred
# X = np.append(X, X[-1::]*step)

# # convert into dataset matrix
# # credit to https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html
# def convertToMatrix(data, step):
#  X, Y =[], []
#  for i in range(len(data)-step):
#   d=i+step  
#   X.append(data[i:d,])
#   Y.append(data[d,])
#  return np.array(X), np.array(Y)

# X_train, y_train =convertToMatrix(X,step)

# X_train = np.reshape(X_train, (X_train.shape[0], 1, X_train.shape[1]))

# # SimpleRNN model
# model = Sequential()
# model.add(SimpleRNN(units=32, input_shape=(1,step), activation='relu'))
# model.add(Dense(16, activation='relu'))
# model.add(Dense(1))
# model.compile(loss='mean_squared_error', optimizer='rmsprop')
# model.summary()

# model.fit(X_train, y_train, epochs=244, batch_size=16)

# preds = model.predict(X_train)

# df = pd.DataFrame(preds)

# df.size

# df['orig'] = X[0:468]

# df[0] = [int(i) for i in df[0]]

# df

---
### Automation of SimpleRNN

In [6]:
# function to convert into dataset matrix
# credit to https://www.datatechnotes.com/2018/12/rnn-example-with-keras-simplernn-in.html
def convertToMatrix(data, step):
 X, Y =[], []
 for i in range(len(data)-step):
  d=i+step  
  X.append(data[i:d,])
  Y.append(data[d,])
 return np.array(X), np.array(Y)

In [7]:
# function to 
# accepts a tuple ((country name, dataset), step size)
def make_predictable(country_tuple, step):
#     get the dataframe
    country = country_tuple[1]
#     samples to take at a time
    step = 4
    X = [val for val in country['Cumulative_cases']]
#     add "step" extra to be used to make the final pred
    X = np.append(X, X[-1::]*step)
    
    X_a, y_a = convertToMatrix(X,step)
    X_a = np.reshape(X_a, (X_a.shape[0], 1, X_a.shape[1]))
#     X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
    return X_a, y_a

In [8]:
# accepts a tuple ((country name, dataset))
def make_model(country_tuple):
    step = 4
    
#     use make_predictable to set up the data
    X_train, y_train = make_predictable(country_tuple, step)
    
#     SimpleRNN model
    model = Sequential()
    model.add(SimpleRNN(units=32, input_shape=(1,step), activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='rmsprop')
#     model.summary()
    
    model.fit(X_train, y_train, epochs=210, batch_size=16, verbose=0)
    print(model.history)

    #TODO: return a tuple with the country and the model
    return country_tuple, model

In [9]:
# # a small sample (2) to test
# # the next cell will run for all countries
# model_temp = []

# for country in lst[0:2]:
#     print(country[0])
#     model_temp.append(make_model(country))

In [10]:
# loop to create the full list of models
# HIGHLY RECOMMENDED TO LEAVE THIS ONE COMMENTED OUT UNLESS YOU WANT TO LET IT RUN FOR A LONG TIME (over an hour on Strix's machine)
model_ls = []

for country in lst:
    model_ls.append(make_model(country))
    print(f'{country[0]} completed')

<tensorflow.python.keras.callbacks.History object at 0x000001BDAC0517F0>
Afghanistan completed
<tensorflow.python.keras.callbacks.History object at 0x000001BDACA60760>
Albania completed
<tensorflow.python.keras.callbacks.History object at 0x000001BDB74966D0>
Algeria completed
<tensorflow.python.keras.callbacks.History object at 0x000001BDB8B8C250>
Andorra completed
<tensorflow.python.keras.callbacks.History object at 0x000001BDBA2A1B50>
Angola completed
<tensorflow.python.keras.callbacks.History object at 0x000001BDBB9917F0>
Anguilla completed
<tensorflow.python.keras.callbacks.History object at 0x000001BDBC19EA00>
Antigua and Barbuda completed
<tensorflow.python.keras.callbacks.History object at 0x000001BDFB24D190>
Argentina completed
<tensorflow.python.keras.callbacks.History object at 0x000001BE03665790>
Armenia completed
<tensorflow.python.keras.callbacks.History object at 0x000001BE03D55FA0>
Aruba completed
<tensorflow.python.keras.callbacks.History object at 0x000001BE06413580>
A

### Making Predictions

In [11]:
# make predictions for a target country using a model country
def make_preds(country_model, target_country):
#     make preds on the target country
    preds = country_model.predict(make_predictable(target_country, step=4)[0])
#     return it as a df
    df = pd.DataFrame(preds)
    return df

In [12]:
# # small test
# for ctry in lst[0:2]:
# #     print(f'--------------------------------------------------------')
# #     print(f'Country: {ctry[0]}, Samples: {len(ctry[1])}')
#     for model in model_temp:
#         tempreds = make_preds(model[1], ctry)
# #         display(tempreds)
#         ctry[1][f'{model[0][0]}_mp'] = tempreds

In [None]:
# whole set
# cell was run but output was cleared so that it didn't take up space
ccount = 1
mcount = 1
for ctry in lst:
    for model in model_ls:
#         print(f'Country: {ccount}, Model: {mcount}')
        tempreds = make_preds(model[1], ctry)
        ctry[1][f'{model[0][0]}_mp'] = tempreds
        mcount += 1
    ccount += 1
    

In [30]:
# clean up the floats by truncating to int32s
for con in lst[0:4]:
    print(con[0])
#     tuples are immutable, how inconvenient
    con = (con[0], con[1].astype('int32', errors='ignore'))
    display(con[1])

Afghanistan


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_mp,Albania_mp,Algeria_mp,Andorra_mp,Angola_mp,Anguilla_mp,Antigua and Barbuda_mp,...,United States of America_mp,United States Virgin Islands_mp,Uruguay_mp,Uzbekistan_mp,Venezuela (Bolivarian Republic of)_mp,Viet Nam_mp,Wallis and Futuna_mp,Yemen_mp,Zambia_mp,Zimbabwe_mp
0,2020-03-28,106,0,161,171,181,153,162,111,149,...,164,183,161,173,169,132,165,139,175,154
1,2020-03-29,114,1,203,210,236,197,171,137,178,...,211,220,198,216,217,163,180,167,207,200
2,2020-03-30,114,2,262,272,273,251,228,173,222,...,269,274,242,279,279,201,235,207,265,249
3,2020-03-31,166,3,290,298,314,281,254,208,258,...,296,303,277,305,306,242,272,241,291,284
4,2020-04-01,192,4,302,309,310,304,270,243,280,...,308,298,290,319,320,269,291,261,290,301
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461,2021-07-02,122156,461,126190,127381,128345,126530,126050,121786,126461,...,123228,127010,128040,125876,126847,130086,129420,121756,123685,125294
462,2021-07-03,123485,462,127607,128823,129849,127855,127373,122984,127889,...,124590,128387,129504,127261,128204,131548,130920,123076,125201,126703
463,2021-07-04,124748,463,127828,129032,129874,128604,128388,124382,128250,...,124881,129244,129893,127699,129152,131998,131057,123787,125216,126936
464,2021-07-05,125937,464,127385,128814,129528,128505,128828,125045,128400,...,124375,129608,129688,127707,129806,132287,131501,124059,125756,126376


Albania


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_mp,Albania_mp,Algeria_mp,Andorra_mp,Angola_mp,Anguilla_mp,Antigua and Barbuda_mp,...,United States of America_mp,United States Virgin Islands_mp,Uruguay_mp,Uzbekistan_mp,Venezuela (Bolivarian Republic of)_mp,Viet Nam_mp,Wallis and Futuna_mp,Yemen_mp,Zambia_mp,Zimbabwe_mp
0,2020-03-25,108,0,174,183,181,177,163,137,162,...,180,178,170,187,188,150,171,151,172,174
1,2020-03-26,136,1,180,189,193,183,175,149,175,...,185,188,183,192,192,165,181,164,181,182
2,2020-03-27,148,2,193,201,206,196,186,162,188,...,197,199,195,204,204,178,191,176,192,195
3,2020-03-28,159,3,212,221,226,213,206,174,205,...,216,221,213,224,224,195,213,192,214,213
4,2020-03-29,174,4,245,254,267,240,232,190,230,...,249,260,244,258,257,219,242,216,250,243
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463,2021-07-01,132521,463,132135,133409,134751,133567,134011,130172,133020,...,129256,134981,134793,133211,135163,137190,136023,128845,131035,131314
464,2021-07-02,132523,464,132139,133412,134755,133571,134015,130178,133023,...,129260,134985,134796,133215,135166,137193,136025,128849,131038,131316
465,2021-07-03,132526,465,132141,133415,134757,133575,134018,130182,133025,...,129264,134987,134795,133219,135170,137195,136029,128851,131041,131315
466,2021-07-04,132534,466,132143,133416,134757,133575,134019,130183,133023,...,129264,134988,134795,133221,135171,137194,136026,128851,131043,131315


Algeria


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_mp,Albania_mp,Algeria_mp,Andorra_mp,Angola_mp,Anguilla_mp,Antigua and Barbuda_mp,...,United States of America_mp,United States Virgin Islands_mp,Uruguay_mp,Uzbekistan_mp,Venezuela (Bolivarian Republic of)_mp,Viet Nam_mp,Wallis and Futuna_mp,Yemen_mp,Zambia_mp,Zimbabwe_mp
0,2020-03-22,139,0,299,308,306,291,260,208,257,...,307,304,276,318,319,238,275,240,296,289
1,2020-03-23,201,1,289,296,300,293,263,240,275,...,295,288,283,304,305,263,279,256,279,290
2,2020-03-24,231,2,315,326,326,309,309,255,303,...,316,326,311,331,329,291,322,282,319,312
3,2020-03-25,264,3,369,379,412,353,344,277,343,...,372,397,366,386,382,331,360,321,382,365
4,2020-03-26,264,4,442,451,471,427,387,327,397,...,449,455,420,462,464,381,419,371,441,434
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
466,2021-07-01,139626,466,140336,141907,142975,141582,142093,137779,141530,...,136987,143145,143389,140946,143211,145845,144759,136822,138864,139706
467,2021-07-02,140075,467,140804,142381,143445,142051,142561,138234,142003,...,137442,143614,143857,141411,143684,146331,145249,137274,139321,140163
468,2021-07-03,140550,468,140843,142422,143719,142381,142898,138701,142117,...,137746,143900,143962,141789,144015,146482,145311,137506,139614,140223
469,2021-07-04,141007,469,140970,142363,143820,142568,143035,138913,142133,...,137982,144024,143898,142077,144226,146549,145403,137580,139795,140172


Andorra


Unnamed: 0,Date_reported,Cumulative_cases,Days_since_100,Afghanistan_mp,Albania_mp,Algeria_mp,Andorra_mp,Angola_mp,Anguilla_mp,Antigua and Barbuda_mp,...,United States of America_mp,United States Virgin Islands_mp,Uruguay_mp,Uzbekistan_mp,Venezuela (Bolivarian Republic of)_mp,Viet Nam_mp,Wallis and Futuna_mp,Yemen_mp,Zambia_mp,Zimbabwe_mp
0,2020-03-22,133,0,214,224,247,204,196,146,192,...,220,238,211,228,225,177,204,180,226,208
1,2020-03-23,134,1,258,266,277,253,218,183,225,...,266,266,244,274,276,210,236,211,255,254
2,2020-03-24,165,2,309,321,324,295,282,216,273,...,314,325,292,327,325,253,297,254,316,296
3,2020-03-25,213,3,343,350,374,333,299,254,309,...,349,357,331,359,360,295,321,290,343,339
4,2020-03-26,231,4,377,386,385,373,340,295,346,...,383,376,358,396,398,332,365,322,368,372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
466,2021-07-01,13911,466,13875,14014,14154,14032,14077,13670,13974,...,13577,14177,14162,13994,14196,14399,14289,13534,13761,13792
467,2021-07-02,13918,467,13876,14014,14154,14032,14077,13671,13972,...,13577,14178,14161,13996,14197,14398,14286,13534,13762,13792
468,2021-07-03,13918,468,13876,14014,14154,14032,14077,13671,13972,...,13577,14178,14161,13996,14197,14398,14286,13534,13762,13792
469,2021-07-04,13918,469,13876,14014,14154,14032,14077,13671,13972,...,13577,14178,14161,13996,14197,14398,14286,13534,13762,13792


---
TODO:

## LSTM
TODO: make an LSTM model with backpropagation or more layers

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
model = keras.Sequential()
# Add an Embedding layer expecting input vocab of size 1000, and
# output embedding dimension of size 64.
model.add(layers.Embedding(input_dim=1000, output_dim=64))

# Add a LSTM layer with 128 internal units.
model.add(layers.LSTM(32))

# Add a Dense layer with 10 units.
model.add(layers.Dense(10))

model.summary()