In [35]:
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset, DataLoader
import torch
import torch.nn as nn
import torch.optim as optim
import datetime

### LSTM

In [22]:
df = pd.read_csv('../normalizados/dados/3500105.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,Município,Código IBGE,Populacao,Total 1ª Dose,Total 2ª Dose,Total Unica,Total Doses Aplicadas,População Vacinada dose1/População Total,População Vacinada/População Total,data,diagnostico_covid19,obito,media_movel_casos,media_movel_obitos,ocupacao_leitos,media_isolamento,Mean.R
0,0,ADAMANTINA,3500105.0,35111.0,0.0,0.0679,0.0,0.0,0.0,0.062927,2021-02-21,0.25,0.5,0.568966,0.444444,0.437308,0.462963,0.0
1,1,ADAMANTINA,3500105.0,35111.0,0.000559,0.068401,0.0,0.00054,0.000559,0.063391,2021-02-22,0.65625,0.5,0.62069,0.444444,0.452059,0.037037,0.0
2,2,ADAMANTINA,3500105.0,35111.0,0.001829,0.076029,0.0,0.003638,0.001829,0.07046,2021-02-23,0.3125,0.0,0.482759,0.444444,0.46681,0.111111,0.0
3,3,ADAMANTINA,3500105.0,35111.0,0.002134,0.076904,0.0,0.004106,0.002134,0.071271,2021-02-24,0.21875,0.5,0.431034,0.444444,0.437308,1.0,0.0
4,4,ADAMANTINA,3500105.0,35111.0,0.002896,0.09241,0.0,0.009112,0.002896,0.085641,2021-02-25,0.125,0.5,0.275862,0.555556,0.351875,0.092593,0.0


In [26]:
df = df.set_index(['data'])
df.index = pd.to_datetime(df.index)
df = df.rename(columns={'diagnostico_covid19': 'value'})
if not df.index.is_monotonic:
    df = df.sort_index()
df.head(3)

Unnamed: 0_level_0,Unnamed: 0,Município,Código IBGE,Populacao,Total 1ª Dose,Total 2ª Dose,Total Unica,Total Doses Aplicadas,População Vacinada dose1/População Total,População Vacinada/População Total,value,obito,media_movel_casos,media_movel_obitos,ocupacao_leitos,media_isolamento,Mean.R
data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2021-02-21,0,ADAMANTINA,3500105.0,35111.0,0.0,0.0679,0.0,0.0,0.0,0.062927,0.25,0.5,0.568966,0.444444,0.437308,0.462963,0.0
2021-02-22,1,ADAMANTINA,3500105.0,35111.0,0.000559,0.068401,0.0,0.00054,0.000559,0.063391,0.65625,0.5,0.62069,0.444444,0.452059,0.037037,0.0
2021-02-23,2,ADAMANTINA,3500105.0,35111.0,0.001829,0.076029,0.0,0.003638,0.001829,0.07046,0.3125,0.0,0.482759,0.444444,0.46681,0.111111,0.0


In [31]:
daily_cases = df['value']
test_data_size = 100
train_data = daily_cases[:-test_data_size]
test_data = daily_cases[-test_data_size:]
train_data.shape

(49,)

In [33]:
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data)-seq_length-1):
        x = data[i:(i+seq_length)]
        y = data[i+seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [36]:
seq_length = 5
X_train, y_train = create_sequences(train_data, seq_length)
X_test, y_test = create_sequences(test_data, seq_length)
X_train = torch.from_numpy(X_train).float()
y_train = torch.from_numpy(y_train).float()
X_test = torch.from_numpy(X_test).float()
y_test = torch.from_numpy(y_test).float()

In [46]:
X_train.shape

torch.Size([43, 5])

In [41]:
class CoronaVirusPredictor(nn.Module):
  def __init__(self, n_features, n_hidden, seq_len, n_layers=2):
    super(CoronaVirusPredictor, self).__init__()
    self.n_hidden = n_hidden
    self.seq_len = seq_len
    self.n_layers = n_layers
    self.lstm = nn.LSTM(
      input_size=n_features,
      hidden_size=n_hidden,
      num_layers=n_layers,
      dropout=0.5
    )
    self.linear = nn.Linear(in_features=n_hidden, out_features=1)
  def reset_hidden_state(self):
    self.hidden = (
        torch.zeros(self.n_layers, self.seq_len, self.n_hidden),
        torch.zeros(self.n_layers, self.seq_len, self.n_hidden)
    )
  def forward(self, sequences):
    lstm_out, self.hidden = self.lstm(
      sequences.view(len(sequences), self.seq_len, -1),
      self.hidden
    )
    last_time_step = \
      lstm_out.view(self.seq_len, len(sequences), self.n_hidden)[-1]
    y_pred = self.linear(last_time_step)
    return y_pred

In [42]:
def train_model(
  model,
  train_data,
  train_labels,
  test_data=None,
  test_labels=None
):
  loss_fn = torch.nn.MSELoss(reduction='sum')
  optimiser = torch.optim.Adam(model.parameters(), lr=1e-3)
  num_epochs = 60
  train_hist = np.zeros(num_epochs)
  test_hist = np.zeros(num_epochs)
  for t in range(num_epochs):
    model.reset_hidden_state()
    y_pred = model(X_train)
    loss = loss_fn(y_pred.float(), y_train)
    if test_data is not None:
      with torch.no_grad():
        y_test_pred = model(X_test)
        test_loss = loss_fn(y_test_pred.float(), y_test)
      test_hist[t] = test_loss.item()
      if t % 10 == 0:
        print(f'Epoch {t} train loss: {loss.item()} test loss: {test_loss.item()}')
    elif t % 10 == 0:
      print(f'Epoch {t} train loss: {loss.item()}')
    train_hist[t] = loss.item()
    optimiser.zero_grad()
    loss.backward()
    optimiser.step()
  return model.eval(), train_hist, test_hist

In [43]:
model = CoronaVirusPredictor(
  n_features=1,
  n_hidden=512,
  seq_len=seq_length,
  n_layers=2
)
model, train_hist, test_hist = train_model(
  model,
  X_train,
  y_train,
  X_test,
  y_test
)


Using a target size (torch.Size([43])) that is different to the input size (torch.Size([43, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.


Using a target size (torch.Size([94])) that is different to the input size (torch.Size([94, 1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.



Epoch 0 train loss: 224.5749969482422 test loss: 1322.40283203125
Epoch 10 train loss: 62.358428955078125 test loss: 393.5260925292969
Epoch 20 train loss: 63.4815559387207 test loss: 409.72088623046875
Epoch 30 train loss: 62.58994674682617 test loss: 399.05450439453125
Epoch 40 train loss: 62.484493255615234 test loss: 389.0190124511719
Epoch 50 train loss: 62.415382385253906 test loss: 390.07537841796875
