In [1]:
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
import seaborn as sns
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


In [2]:
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq

class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size)

        self.linear = nn.Linear(hidden_layer_size, output_size)

        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]


In [3]:
def make_state_predictions(train, test, state, prediction):
    scaler = MinMaxScaler(feature_range=(-1, 1))
    
    train_data = train[prediction].values.astype(float)
    test_data = test[prediction].values.astype(float)

    train_data_normalized = scaler.fit_transform(train_data .reshape(-1, 1))
    train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)
    train_window = 30
    train_inout_seq = create_inout_sequences(train_data_normalized, train_window)

    model = LSTM()
    loss_function = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    epochs = 75

    for i in range(epochs):
        for seq, labels in train_inout_seq:
            optimizer.zero_grad()
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                            torch.zeros(1, 1, model.hidden_layer_size))

            y_pred = model(seq)

            single_loss = loss_function(y_pred, labels)
            single_loss.backward()
            optimizer.step()

    fut_pred = 30

    test_inputs = train_data_normalized[-train_window:].tolist()

    model.eval()

    for i in range(fut_pred):

        seq = torch.FloatTensor(test_inputs[-train_window:])
        with torch.no_grad():
            model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                            torch.zeros(1, 1, model.hidden_layer_size))
            test_inputs.append(model(seq).item())

    actual_predictions = scaler.inverse_transform(np.array(test_inputs[train_window:] ).reshape(-1, 1))
    return actual_predictions



In [4]:
data_tendency = pd.read_csv('train_trendency.csv')
data_tendency['Date'] = pd.to_datetime(data_tendency['Date'], format="%m-%d-%Y")
data_tendency['Date'] = data_tendency['Date'].dt.strftime('%m-%d-%Y')
test = pd.read_csv('test.csv')

for states in test['Province_State'].unique(): 
    test_one = test[test['Province_State'] == states]
    test_idx = test_one.set_index(["Date"], drop=True)
    test_idx = test_idx.drop("Unnamed: 0", axis=1)

    data_one = data_tendency[data_tendency['Province_State'] == states]
    df_idx = data_one.set_index(["Date"], drop=True)
    
    print(states)
    predictions = make_state_predictions(df_idx, test_idx, states, 'Deaths')

    flat_list = [item for sublist in predictions for item in sublist]
    test[test['Province_State'] == states]['Deaths']
    state_indices = test[test['Province_State'] == states]['Unnamed: 0'].values
    for i in state_indices:
        test.loc[test['Unnamed: 0'] == i, 'Deaths'] = flat_list[0]
        flat_list = flat_list[1:]
    
    print(states)
    predictions = make_state_predictions(df_idx, test_idx, states, 'Confirmed')

    flat_list = [item for sublist in predictions for item in sublist]
    test[test['Province_State'] == states]['Confirmed']
    state_indices = test[test['Province_State'] == states]['Unnamed: 0'].values
    for i in state_indices:
        test.loc[test['Unnamed: 0'] == i, 'Confirmed'] = flat_list[0]
        flat_list = flat_list[1:]

Alabama
Alabama
Alaska
Alaska
Arizona
Arizona
Arkansas
Arkansas
California
California
Colorado
Colorado
Connecticut
Connecticut
Delaware
Delaware
Florida
Florida
Georgia
Georgia
Hawaii
Hawaii
Idaho
Idaho
Illinois
Illinois
Indiana
Indiana
Iowa
Iowa
Kansas
Kansas
Kentucky
Kentucky
Louisiana
Louisiana
Maine
Maine
Maryland
Maryland
Massachusetts
Massachusetts
Michigan
Michigan
Minnesota
Minnesota
Mississippi
Mississippi
Missouri
Missouri
Montana
Montana
Nebraska
Nebraska
Nevada
Nevada
New Hampshire
New Hampshire
New Jersey
New Jersey
New Mexico
New Mexico
New York
New York
North Carolina
North Carolina
North Dakota
North Dakota
Ohio
Ohio
Oklahoma
Oklahoma
Oregon
Oregon
Pennsylvania
Pennsylvania
Rhode Island
Rhode Island
South Carolina
South Carolina
South Dakota
South Dakota
Tennessee
Tennessee
Texas
Texas
Utah
Utah
Vermont
Vermont
Virginia
Virginia
Washington
Washington
West Virginia
West Virginia
Wisconsin
Wisconsin
Wyoming
Wyoming


In [5]:
test.to_csv('results3.csv')
results = test.drop("Province_State", axis=1)
results = results.drop("Date", axis=1)
results.rename(columns = {'Unnamed: 0':'ID'}, inplace = True)
results.to_csv('submit3.csv')
results

Unnamed: 0,ID,Unnamed: 0.1,Confirmed,Deaths
0,0,0,5.139128e+05,10472.914835
1,1,1,6.367897e+04,314.451297
2,2,2,8.445481e+05,16782.711560
3,3,3,3.292941e+05,5684.064695
4,4,4,3.650360e+06,60900.534919
...,...,...,...,...
1495,1495,1495,6.143826e+05,9874.587459
1496,1496,1496,3.758915e+05,5169.913273
1497,1497,1497,1.457877e+05,2666.945983
1498,1498,1498,6.384768e+05,7267.109333
