In [148]:
import torch
import numpy as np
from torch import nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader

In [249]:
truth_df = pd.read_csv("../data/CDC/truth-Incident Hospitalizations.csv")
truth_df.sort_values(by=['date', 'location'], inplace=True)
truth_df = truth_df[truth_df['date'] >= '2022-01-01']
truth_df = truth_df[truth_df['location'] != 'US']
unique_dates = truth_df['date'].unique()
unique_states = truth_df['location'].unique()

In [250]:
weeks = np.zeros([len(unique_dates),len(unique_states)])
for id1,i in enumerate(unique_dates):
    for id2,j in enumerate(unique_states):
        weeks[id1,id2] = truth_df[(truth_df['date']==i) & (truth_df['location']==j)]['value'].values
weeks.shape

(58, 53)

In [251]:
truth_df

Unnamed: 0,date,location,location_name,value
5151,2022-01-01,01,Alabama,60
5150,2022-01-01,02,Alaska,8
5153,2022-01-01,04,Arizona,149
5152,2022-01-01,05,Arkansas,33
5154,2022-01-01,06,California,97
...,...,...,...,...
8280,2023-02-04,54,West Virginia,28
8279,2023-02-04,55,Wisconsin,23
8281,2023-02-04,56,Wyoming,1
8267,2023-02-04,72,Puerto Rico,56


In [252]:
weeks

array([[ 60.,   8., 149., ...,  20.,  15.,   0.],
       [ 25.,   3.,  79., ...,  10.,  22.,   0.],
       [ 33.,  15.,  37., ...,   2.,  19.,   0.],
       ...,
       [ 58.,  10., 109., ...,  11.,  49.,   0.],
       [ 42.,   3.,  48., ...,   5.,  54.,   0.],
       [ 34.,  11.,  56., ...,   1.,  56.,   0.]])

In [158]:
loader_temp = weeks[:-1].copy()
# # split the data into train and validation sets
# train_data = loader_temp[:-10].copy()
# val_data = loader_temp[-10:].copy()
# # train_loader = DataLoader(loader_temp, batch_size=10)

# # create data loaders for training and validation
# # train_loader = DataLoader(train_data, batch_size=10)
# # val_loader = DataLoader(val_data, batch_size=10)

In [240]:
weeks[-1].shape

(54,)

In [211]:
# split the data into train and validation sets
train_data = loader_temp[:-10].copy()
val_data = loader_temp[-10:].copy()

# convert the numpy arrays to PyTorch tensors
train_inputs = torch.tensor(train_data[:, :10], dtype=torch.float32)
train_labels = torch.tensor(train_data[:, 10:14], dtype=torch.float32)
val_inputs = torch.tensor(val_data[:, :10], dtype=torch.float32)
val_labels = torch.tensor(val_data[:, 10:14], dtype=torch.float32)

mean = train_inputs.mean(dim=0)
std = train_inputs.std(dim=0)
train_inputs = (train_inputs - mean) / std
val_inputs = (val_inputs - mean) / std

# create the datasets
train_dataset = torch.utils.data.TensorDataset(train_inputs, train_labels)
val_dataset =torch.utils.data.TensorDataset(val_inputs, val_labels)

# create data loaders for training and validation
train_loader = DataLoader(train_dataset, batch_size=10)
val_loader = DataLoader(val_dataset, batch_size=10)

In [243]:
train_data.shape

(47, 54)

In [242]:
len(train_loader)

5

In [244]:
class AutoMLP(nn.Module):
    def __init__(self,input_length, output_length,hidden_length):
        super(AutoMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_length, hidden_length),
            nn.ReLU(),
            nn.Linear(hidden_length, hidden_length),
            nn.ReLU(),
            nn.Linear(hidden_length, output_length),
        )
    def forward(self, x):
        return self.model(x)

In [245]:
model = AutoMLP(10, 4, 16)
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [246]:
for epoch in range(3000):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print('Epoch %d loss: %.3f' % (epoch + 1, running_loss / (i + 1)))
    
    # validate the model after each epoch
    model.eval()
    with torch.no_grad():
        val_running_loss = 0.0
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss += loss.item()
        print('Validation loss after epoch %d: %.3f' % (epoch + 1, val_running_loss / (i + 1)))


Epoch 1 loss: 34.469
Validation loss after epoch 1: 215.802
Epoch 2 loss: 34.451
Validation loss after epoch 2: 215.750
Epoch 3 loss: 34.433
Validation loss after epoch 3: 215.695
Epoch 4 loss: 34.415
Validation loss after epoch 4: 215.636
Epoch 5 loss: 34.395
Validation loss after epoch 5: 215.573
Epoch 6 loss: 34.373
Validation loss after epoch 6: 215.502
Epoch 7 loss: 34.350
Validation loss after epoch 7: 215.421
Epoch 8 loss: 34.324
Validation loss after epoch 8: 215.326
Epoch 9 loss: 34.295
Validation loss after epoch 9: 215.214
Epoch 10 loss: 34.263
Validation loss after epoch 10: 215.082
Epoch 11 loss: 34.226
Validation loss after epoch 11: 214.927
Epoch 12 loss: 34.185
Validation loss after epoch 12: 214.750
Epoch 13 loss: 34.140
Validation loss after epoch 13: 214.543
Epoch 14 loss: 34.088
Validation loss after epoch 14: 214.293
Epoch 15 loss: 34.028
Validation loss after epoch 15: 213.997
Epoch 16 loss: 33.962
Validation loss after epoch 16: 213.646
Epoch 17 loss: 33.887
Vali

In [144]:
# proportions = [.90, .10]
# lengths = [int(p * len(train_loader)) for p in proportions]
# lengths[-1] = len(train_loader) - sum(lengths[:-1])
# tr_dataset, vl_dataset = torch.utils.data.random_split(train_loader, lengths)

In [None]:
# class AutoMLP(nn.Module):
#     def __init__(self,input_length, hidden_length, output_length):
#         super(AutoMLP, self).__init__()
#         self.input_length = input_length
#         self.hidden_length = hidden_length
#         self.output_length = output_length
#         self.fc1 = nn.Linear(self.input_length, self.hidden_length)
#         self.fc2 = nn.Linear(self.hidden_length, self.output_length)
#         self.relu = nn.ReLU()
#         self.softmax = nn.Softmax(dim=1)