In [1]:
import torch
import numpy as np
from torch import nn
import torch.optim as optim
import pandas as pd
from torch.utils.data import DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [29]:
states_in_the_prediction = ['01','02','04','05','06','08','09','10','12','13','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38','39','40',
 '41','42','44','45','46','47','48','49','50','51','53','54','55','56']

In [97]:
truth_df = pd.read_csv("../data/CDC/truth-Incident Hospitalizations.csv")
truth_df = truth_df[truth_df['date'] >= '2022-01-01']
truth_df = truth_df[truth_df['location'] != 'US']
truth_df = truth_df[truth_df['location'].isin(states_in_the_prediction)]
truth_df.sort_values(by=['date', 'location'], inplace=True)
unique_dates = truth_df['date'].unique()
unique_states = truth_df['location'].unique()

In [56]:
#### Standardize the data individually for each state
mean_arr = []
std_arr = []
for state in unique_states:
    state_values = truth_df[truth_df['location'] == state]['value'].values
    mean = state_values.mean()
    mean_arr.append(mean)
    std = state_values.std()
    std_arr.append(std)
    truth_df.loc[truth_df['location'] == state, 'norm_value'] = (state_values - mean) / std

In [98]:
weeks = np.zeros([len(unique_dates),len(unique_states)])
for id1,i in enumerate(unique_dates):
    for id2,j in enumerate(unique_states):
        weeks[id1,id2] = truth_df[(truth_df['date']==i) & (truth_df['location']==j)]['value'].values
        # weeks[id1,id2] = truth_df[(truth_df['date']==i) & (truth_df['location']==j)]['norm_value'].values

In [99]:
weeks.shape

(58, 50)

In [194]:
# split the data into train and validation sets
loader_temp = weeks.copy().transpose(1,0)
train_data = loader_temp[:,:-2].copy() #train_data = loader_temp[:,:-4].copy()
train_data = np.concatenate([train_data[:,i:i+10] for i in range(0, 46, 1)], axis = 0)
np.random.shuffle(train_data)
val_data = train_data[:500]
train_data = train_data[500:]
test_data = loader_temp[:,-8:].copy() # test_data = loader_temp[:,-10:].copy()

In [195]:
# convert the numpy arrays to PyTorch tensors
# train_inputs = torch.tensor(train_data[:, :6], dtype=torch.float32)
# train_labels = torch.tensor(train_data[:, 6:], dtype=torch.float32)

# val_inputs = torch.tensor(val_data[:, :6], dtype=torch.float32)
# val_labels = torch.tensor(val_data[:, 6:], dtype=torch.float32)

# test_inputs = torch.tensor(test_data[:, :6], dtype=torch.float32)
# test_labels = torch.tensor(test_data[:, 6:], dtype=torch.float32)

train_inputs = torch.tensor(train_data[:, :6], dtype=torch.float32)
train_labels = torch.tensor(train_data[:, 6:], dtype=torch.float32)

val_inputs = torch.tensor(val_data[:, :6], dtype=torch.float32)
val_labels = torch.tensor(val_data[:, 6:], dtype=torch.float32)

test_inputs = torch.tensor(test_data[:, :6], dtype=torch.float32)
test_labels = torch.tensor(test_data[:, 6:], dtype=torch.float32)

mean = train_inputs.mean()
std = train_inputs.std()

train_inputs = (train_inputs - mean) / std
val_inputs = (val_inputs - mean) / std
train_labels = (train_labels - mean) / std
val_labels = (val_labels - mean) / std
test_inputs = (test_inputs  - mean) / std

In [196]:
# create the datasets
train_dataset = torch.utils.data.TensorDataset(train_inputs, train_labels)
val_dataset =torch.utils.data.TensorDataset(val_inputs, val_labels)

# create data loaders for training and validation
train_loader = DataLoader(train_dataset, batch_size=8, shuffle = True)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle = False)

In [197]:
class AutoMLP(nn.Module):
    def __init__(self,input_length, output_length,hidden_length):
        super(AutoMLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_length, hidden_length),
            nn.ReLU(),
            nn.Linear(hidden_length, hidden_length),
            nn.ReLU(),
            nn.Linear(hidden_length, output_length),
        )
    def forward(self, x):
        return self.model(x)

In [229]:
model = AutoMLP(6, 4, 256).to(device) # 8, 16, 32, 64, 128, 256
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size= 10, gamma=0.9) # stepwise learning rate decay

In [233]:
best_val = 1e6
for epoch in range(300):
    running_loss = []
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss.append(loss.item())

    
    # validate the model after each epoch
    model.eval()
    with torch.no_grad():
        val_running_loss = []
        for i, data in enumerate(val_loader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            val_running_loss.append(loss.item())
    if epoch % 10 == 0:
        print('Epoch %d Training loss: %.3f Validation loss : %.3f' % (epoch + 1, np.mean(running_loss),  np.mean(val_running_loss)))
    scheduler.step()
    if np.mean(val_running_loss) < best_val:
        best_val = np.mean(val_running_loss)
        best_model = model

Epoch 1 Training loss: 0.138 Validation loss : 0.499
Epoch 11 Training loss: 0.137 Validation loss : 0.492
Epoch 21 Training loss: 0.135 Validation loss : 0.504
Epoch 31 Training loss: 0.134 Validation loss : 0.494
Epoch 41 Training loss: 0.134 Validation loss : 0.499
Epoch 51 Training loss: 0.133 Validation loss : 0.513
Epoch 61 Training loss: 0.132 Validation loss : 0.509
Epoch 71 Training loss: 0.132 Validation loss : 0.513
Epoch 81 Training loss: 0.131 Validation loss : 0.515
Epoch 91 Training loss: 0.130 Validation loss : 0.509
Epoch 101 Training loss: 0.130 Validation loss : 0.508
Epoch 111 Training loss: 0.130 Validation loss : 0.509
Epoch 121 Training loss: 0.129 Validation loss : 0.512
Epoch 131 Training loss: 0.129 Validation loss : 0.509
Epoch 141 Training loss: 0.129 Validation loss : 0.511
Epoch 151 Training loss: 0.128 Validation loss : 0.511
Epoch 161 Training loss: 0.128 Validation loss : 0.509
Epoch 171 Training loss: 0.128 Validation loss : 0.513
Epoch 181 Training lo

In [159]:
tryput_1 = (best_model(test_inputs.to(device))* std + mean)
np.mean(np.abs(np.abs([item for sublist in tryput_1.detach().numpy() for item in sublist]) - np.array(truth_df[truth_df['date'] == '2023-01-28']['value'])))

16.904602546691894

In [94]:
# pred = best_model(test_inputs.to(device))[:,np.array([False, False, False,True,])]
# result = []
# for i in range(len(pred)):
#     result.append(((pred[i] * std_arr[i])+ mean_arr[i]).tolist())
# flat_list = [item for sublist in result for item in sublist]
# p.mean(np.abs(flat_list - np.array(truth_df[truth_df['date'] == '2023-02-04']['value'])))

In [234]:
test_preds = best_model(test_inputs.to(device)) * std + mean
temp = test_preds[:,np.array([True, False, False,False,])]
# flat_list = [item for sublist in temp for item in sublist]
# flat_list.to_list()
flat_list = [item for sublist in temp.detach().numpy() for item in sublist]
np.mean(np.abs(np.abs(flat_list) - np.array(truth_df[truth_df['date'] == '2023-01-28']['value'])))

20.660641059875488

In [235]:
test_preds = best_model(test_inputs.to(device)) * std + mean
temp = test_preds[:,np.array([False, True, False,False,])]
# flat_list = [item for sublist in temp for item in sublist]
# flat_list.to_list()
flat_list = [item for sublist in temp.detach().numpy() for item in sublist]
np.mean(np.abs(np.abs(flat_list) - np.array(truth_df[truth_df['date'] == '2023-02-04']['value'])))

23.372677421569826

In [152]:
np.abs(flat_list)

array([ 29.886135  ,  10.501411  ,   9.740902  ,  12.0020485 ,
         2.3397179 ,  32.494225  ,   0.6734886 ,   4.4903793 ,
       409.8579    ,  41.751476  ,   2.7727165 ,   2.7423897 ,
        11.769562  ,   3.4826698 ,  10.590172  ,   6.6613884 ,
        29.887796  ,  38.588455  ,   1.9614639 ,   9.76577   ,
         3.7868843 ,  23.743057  ,   0.8619957 ,  40.73269   ,
        35.113327  ,   6.3290825 ,  26.505194  ,   5.2713547 ,
        28.366827  ,   6.78788   ,   2.1146393 , 100.33423   ,
         2.4374504 ,  11.983963  ,  26.507687  ,  12.212769  ,
        59.106144  , 110.23315   ,   3.5222511 ,  31.665165  ,
        10.325104  ,  27.583416  , 212.01239   ,  11.617561  ,
        13.109932  ,  13.721832  ,  54.408634  ,  22.691063  ,
         1.3688316 ,   0.45460892], dtype=float32)

In [176]:
np.array(truth_df[truth_df['date'] == '2023-01-21']['value'])

array([ 58,  10, 109,  61, 198,  33,  31,   5, 522,  72,   0,  30,  62,
        96,  43,  41,  30,  70,  31,  61, 122, 154,  25,  53,  94,  12,
        57,  21,  20, 102,  30, 243,  60,  28, 107, 139,  25, 285,  14,
        60,  11,  76, 454,  17,  11,  64,  80,  13,  95,  11])

In [153]:
np.abs(np.abs(flat_list) - np.array(truth_df[truth_df['date'] == '2023-01-28']['value']))

array([ 12.1138649 ,   7.50141144,  38.25909805,  17.99795151,
       112.66028214,   2.49422455,  31.32651138,   0.50962067,
        50.85791016,  26.24852371,   8.22728348,  14.25761032,
        49.23043823,  62.51733017,  34.40982819,  10.3386116 ,
         0.8877964 ,   3.4115448 ,   9.03853607,  27.23423004,
        67.21311569,  62.25694275,  14.1380043 ,  10.7326889 ,
        32.88667297,   3.67091751,   4.50519371,   5.72864532,
         8.36682701,  63.21212006,  14.88536072,  53.66577148,
        32.56254959,  10.01603699,  40.49231339,  83.78723145,
        49.10614395,  27.76685333,   6.47774887,   2.33483505,
         5.67489624,  10.41658401, 116.98760986,   6.61756134,
         7.10993195,  39.27816772,   1.59136581,   3.69106293,
        42.63116837,   4.54539108])

In [124]:
test_preds = best_model(test_inputs.to(device)) * std + mean
print("test error:", torch.mean(torch.abs(test_preds.cpu() - test_labels)))

RuntimeError: The size of tensor a (4) must match the size of tensor b (2) at non-singleton dimension 1

In [177]:
GLEAM_01_23_pd = pd.read_csv("../data/GLEAM/2023-01-23-MOBS-GLEAM_FLUH.csv")

In [180]:
GLEAM_02_04_pred = GLEAM_01_23_pd[(GLEAM_01_23_pd['target'] == '2 wk ahead inc flu hosp') & (GLEAM_01_23_pd['quantile'] == 0.5) & (GLEAM_01_23_pd['location'].isin(states_in_the_prediction))][['location','value']]['value'].to_numpy()

In [144]:
# proportions = [.90, .10]
# lengths = [int(p * len(train_loader)) for p in proportions]
# lengths[-1] = len(train_loader) - sum(lengths[:-1])
# tr_dataset, vl_dataset = torch.utils.data.random_split(train_loader, lengths)

In [None]:
# class AutoMLP(nn.Module):
#     def __init__(self,input_length, hidden_length, output_length):
#         super(AutoMLP, self).__init__()
#         self.input_length = input_length
#         self.hidden_length = hidden_length
#         self.output_length = output_length
#         self.fc1 = nn.Linear(self.input_length, self.hidden_length)
#         self.fc2 = nn.Linear(self.hidden_length, self.output_length)
#         self.relu = nn.ReLU()
#         self.softmax = nn.Softmax(dim=1)