In [None]:
import pandas as pd
import torch
from torch import nn
from torch.utils.data import DataLoader

import git
import sys
import os

sys.path.append(git.Repo(".", search_parent_directories=True).working_tree_dir)
sys.path.append(os.abs.path(os.path.join(os.path.dirname(__file__), "..")))

from modules.model import ShallowRegressionLSTM
from modules.model_utils import (
    SequenceDataset,
    train_lstm_model,
    test_lstm_model,
    lstm_predict,
)

In [None]:
symptom_data = pd.read_csv("../datasets/combined.csv")
media_data = pd.read_csv("../datasets/media_count_ratio_all_2021.csv")

In [None]:
symptom = "symptom:shortness of breath"

In [None]:
a = symptom_data[symptom]
b = media_data[symptom]
c = symptom_data["daily_new_positives"]

features = ["symptom", "media"]
target = "case"


df = pd.concat([a, b, c], axis=1)
df.columns = features + [target]
df

In [None]:
test_start = int(365 * 0.8)

df_train = df.loc[:test_start].copy()
df_test = df.loc[test_start:].copy()

In [None]:
target_mean = df_train["case"].mean()
target_stdev = df_train["case"].std()

for c in df_train.columns:
    mean = df_train[c].mean()
    stdev = df_train[c].std()

    df_train[c] = (df_train[c] - mean) / stdev
    df_test[c] = (df_test[c] - mean) / stdev

In [None]:
torch.manual_seed(101)

batch_size = 4
sequence_length = 30

train_dataset = SequenceDataset(
    df_train, target=target, features=features, sequence_length=sequence_length
)
test_dataset = SequenceDataset(
    df_test, target=target, features=features, sequence_length=sequence_length
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

X, y = next(iter(train_loader))

print("Features shape:", X.shape)
print("Target shape:", y.shape)

In [None]:
learning_rate = 5e-5
num_hidden_units = 16

model = ShallowRegressionLSTM(num_features=2, hidden_units=num_hidden_units)
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
print("Untrained test\n--------")
test_lstm_model(test_loader, model, loss_function)
print()

for i_epoch in range(10):
    print("Epoch {}\n---------".format(i_epoch))
    train_lstm_model(train_loader, model, loss_function, optimizer=optimizer)
    test_lstm_model(test_loader, model, loss_function)
    print()

In [None]:
train_eval_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

ystar_col = "forcasted_cases"
df_train[ystar_col] = lstm_predict(train_eval_loader, model).numpy()
df_test[ystar_col] = lstm_predict(test_loader, model).numpy()

df_out = pd.concat((df_train, df_test))[[target, ystar_col]]

for c in df_out.columns:
    df_out[c] = df_out[c] * target_stdev + target_mean

df_out