In [40]:

import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import sklearn
import os
import torch 
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader, Dataset
from sklearn.preprocessing import MinMaxScaler
from pandas.io.stata import StataReader
from PIL import Image
import plotly.express as px
from sklearn.linear_model import LinearRegression
import requests

In [41]:
def get_cases():
    '''
    window = tkinter.Tk()
    window.title("Case data explorer")
    window.withdraw()
    '''
    cwd = os.getcwd()
    csv_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv"
    req = requests.get(csv_url, allow_redirects=True)
    open('Local_case_data/time_series_covid19_confirmed_US.csv', 'wb').write(req.content)
    #f = filedialog.askopenfilename(initialdir="/Local_case_data/",initialfile="time_series_covid19_confirmed_US.csv", title="Select a File",filetypes=(("CSV files", "*.csv*"),("all files", "*.*")), )
    cwd = os.getcwd()
    f = cwd+"/Local_case_data/time_series_covid19_confirmed_US.csv"
    #label_file_explorer.configure(text="File Opened: "+f)
    #f.close()
    case_df = pd.read_csv(str(f))
    #window.mainloop
    #window.destroy()
    return case_df
cases = get_cases()

In [42]:
def preprocessing(case_df):
    print("Preprocessing...")
    columns = case_df.columns
    #datecolumns = case_df.columns()
    case_df.drop(columns[:5], inplace=True, axis=1)
    region_col = list(case_df.columns.values)
    region_col_ax = region_col[11:]
    plot_cols = region_col_ax
    print("Case data last updated: " + str(case_df.columns[-1]))
    state_name ='Mississippi' #or 'Mississippi'
    region = case_df.loc[case_df['Province_State'] == state_name]
    #print(region)
    #st.write(region)
    county_list = region['Admin2']
    #default_county = county_list.iloc[0]
    county_name =  'Hinds'#"Hinds")# or default_county
    county = region.loc[region['Admin2'] == county_name]
    columns = columns[5:11]
    county.drop(columns, inplace=True, axis=1)
    date_index = range(len(county))
    county = county.transpose()
    lastdate = (str(region.columns[-1]))
    col_length = len(region.columns)
    initial_startdate = (str(region.columns[6]))
    test_startdate = (str(region.columns[int(col_length*.803)]))
    val_startdate = (str(region.columns[int(col_length*.4)]))
    print(test_startdate)
    return region_col, region_col_ax, region, county, initial_startdate, val_startdate, test_startdate, county_name, state_name, lastdate

In [43]:

def train_test_val_split(preprocessed_data):
    county = preprocessed_data[3]
    county_length = len(county)
    training_df = county[0:int(county_length*0.4)] # training set for model parameter optimization
    try:
        val_df = county[int(county_length*0.4):int(county_length*0.8)] #validation set used to find optimal model hyperparameters
    except:
        val_df = county[int(county_length*0.4):int(county_length*0.798)] #second splice needed in case previous split doesn't work based on odd vs even days.
    test_df = county[int(county_length*0.8):] #test set used to determine model performance in general
    num_feature_days = county.shape[0]
    print("Number of Days:", str(num_feature_days))
    training_mean = training_df.mean()
    training_std = training_df.std()
    print("TYPES: \n", type(training_std))
    return training_df, val_df, test_df, training_mean, training_std

In [44]:
seq_len = 35
batch_size = 55
x_train, x_val, x_test, training_mean, training_std = train_test_val_split(preprocessing(cases))

Preprocessing...
Case data last updated: 8/12/22
2/8/22
Number of Days: 934
TYPES: 
 <class 'pandas.core.series.Series'>


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [45]:

def normalize(df, training_mean, training_std):
    normed_df = (df - training_mean)/training_std
    return normed_df


def denormalize(df, training_mean, training_std ):
    #denormalized_df = training_std.values/(df.values - training_mean.values)
    denormalized_df = training_std.values*df.values + training_mean.values
    return denormalized_df

In [46]:
def torch_data_loader(x_train, x_val, x_test):

    train_features = torch.Tensor(x_train.values)
    val_features = torch.Tensor(x_val.values)
    test_features = torch.Tensor(x_test.values)
    print(train_features.shape)
    train_targets = torch.Tensor(x_train.values)
    val_targets = torch.Tensor(x_val.values)
    test_targets = torch.Tensor(x_test.values)
    batch_size = 1

    train_dataset = TensorDataset(train_features, train_targets)
    val_dataset = TensorDataset(val_features, val_targets)
    test_dataset = TensorDataset(test_features, test_targets)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    print(train_loader)
    print(train_dataset)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
    test_loader_one = DataLoader(test_dataset, batch_size=1, shuffle=True)
    return train_loader, val_loader, test_loader, test_loader_one


In [47]:
class LSTM_Model(torch.nn.Module):
    def __init__(self, input_dim , hidden_size , num_layers, batch_size):
        super(LSTM_Model, self).__init__()
        self.num_layers = num_layers
        self.input_size = input_dim
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.lstm = torch.nn.LSTM(input_size=input_dim , hidden_size = hidden_size , num_layers= num_layers )
        self.fc = torch.nn.Linear(hidden_size,1)

    def forward(self,x,hn,cn):
        out , (hn,cn) = self.lstm(x , (hn,cn))
        final_out = self.fc(out[-1])
        return final_out,hn,cn

    def predict(self,x):
        hn,cn  = self.init()
        final_out = self.fc(hn[-1])
        return final_out

    def init(self):
        h0 =  torch.zeros(self.num_layers , self.batch_size , self.hidden_size)
        c0 =  torch.zeros(self.num_layers , self.batch_size , self.hidden_size)
        return h0 , c0

input_dim = 1
hidden_size = seq_len
num_layers = 5
covid_forecast_model = LSTM_Model(input_dim, hidden_size, num_layers, batch_size)

loss_function = torch.nn.MSELoss() 
optimizer  = torch.optim.Adam(covid_forecast_model.parameters(), lr=0.017) # used adaptive moment estimation to optimize the model

TypeError: __init__() takes 5 positional arguments but 6 were given

In [None]:
        
def train(dataloader, model):
    predictions = []
    loss_list = []
    hn , cn = model.init()
    model.train()
    for batch , item in enumerate(dataloader):
        x , y = item
        y = y.type(torch.FloatTensor)
        #x = x.to(device)
        #y = y.to(device)
        out , hn , cn = model(x.reshape(seq_len,batch_size,-1),hn,cn)
        out = out.view(-1)
        # print(out.shape)
        # print(y.shape)
        loss = loss_function(out.reshape(batch_size) , y)
        hn = hn.detach()
        cn = cn.detach()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print(f'Training Loss: {loss.item()}')
        if batch == len(dataloader)-1:
            loss = loss.item()
            print(f"Train loss: {loss:>7f} ")
        loss_list.append(loss)
        predictions.append(out.detach().numpy())
        return predictions, loss_list

In [None]:
def test(dataloader, model):
    predictions = []
    loss_list = []
    hn , cn = model.init()
    model.eval()
    for batch , item in enumerate(dataloader):
        x , y = item
        y = y.type(torch.FloatTensor)
        #x = x.to(device)
        #y = y.to(device)
        out , hn , cn = model(x.reshape(seq_len,batch_size,1),hn,cn)
        loss = loss_function(out.reshape(batch_size) , y)
        print(f"test loss: {loss.item():>7f} ")
        if batch == len(dataloader)-1:
            loss = loss.item()
            print(f"Test loss: {loss:>7f} ")
        predictions.append(out.detach().numpy())
        loss_list.append(loss)
        return predictions, loss_list

In [None]:
train_loader, val_loader, test_loader, test_loader_one = torch_data_loader(x_train, x_val, x_test)

torch.Size([373, 1])
<torch.utils.data.dataloader.DataLoader object at 0x7fca1c33c340>
<torch.utils.data.dataset.TensorDataset object at 0x7fc9dd5dcf70>


In [None]:
epochs = 100
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}: ")
    train(train_loader, covid_forecast_model)
test_predictions = test(test_loader, covid_forecast_model)
#print(test_predictions[:][:][:][0])

Epoch 1: 


ModuleAttributeError: 'LSTMModel' object has no attribute 'init'