In [1]:
import numpy as np
import pandas as pd
import os
import math
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
pd.set_option('display.max_columns',100)
from google.colab import drive
from scipy.stats import pearsonr
import torch
from torch.nn.modules.module import Module
from torch.autograd import Variable
from torch.nn.parameter import Parameter
import torch.utils as utils
from torch import nn
import torch.nn.functional as F
from tqdm.notebook import tqdm_notebook
from sklearn.preprocessing import MinMaxScaler,StandardScaler
import torch.optim as optim
drive.mount('/content/drive')
root_dir = "/content/drive/MyDrive/Colab Notebooks/Public Folder: SafeGraph Group /Safegraph Data/Summary"
os.chdir(f"{root_dir}")

Mounted at /content/drive


In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
cuda = True if torch.cuda.is_available() else False
drop_rate = 0.5
time_step = 5
epochs = 200
feature_matrix = np.load('feature_matrix.npy')

In [3]:
scaler = StandardScaler()
def normalization(feature_matrix):
  feature_mat = np.zeros((feature_matrix.shape[0],feature_matrix.shape[1],feature_matrix.shape[2]))
  target = np.zeros((feature_matrix.shape[0],feature_matrix.shape[1]))
  for i in range(feature_matrix.shape[0]):
    feature_vector=feature_matrix[i]
    f1 = scaler.fit_transform(feature_vector[:,0].reshape(-1,1)).reshape(-1,1)
    f2 = scaler.fit_transform(feature_vector[:,1].reshape(-1,1)).reshape(-1,1)
    visit_nums = scaler.fit_transform(feature_vector[:,2].reshape(-1,1)).reshape(-1,1)
    feature_vector = np.stack([f1,f2,visit_nums],axis=1).reshape(feature_matrix.shape[1],feature_matrix.shape[2])
    feature_mat[i] = feature_vector
    target[i] = visit_nums.flatten()
  return feature_mat, target
features = feature_matrix[:,:,[0,3,4]]
features, results = normalization(features)
train_idx = int(0.7 * features.shape[0])
train_features = features[:train_idx,:,:]
test_features = features[train_idx:,:,:]

In [4]:
def get_features(features, index):
  f = features[:,index,:]
  new = np.zeros((len(f) - time_step, time_step, 3))
  target = np.zeros((len(f) - time_step, 1))
  for i in range(len(f) - time_step):
    new[i] = f[i:i+time_step]
    target[i] = f[i+time_step, 2]
  return new, target

In [5]:
class MLP(torch.nn.Module):
    def __init__(self, n_i, n_h, n_o):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(n_i, n_h)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(n_h, n_o)
    def forward(self, input):
        input = self.linear1(input)
        input = self.relu(input)
        input = F.dropout(input, p=drop_rate, training=self.training)
        input = self.linear2(input)
        return input
###Convert the data to tensor 
def totensor(data):
    return torch.Tensor(data).to(device)

class LSTM(nn.Module):
    def __init__(self):
        super(LSTM,self).__init__()
        self.mlp = MLP(time_step,32,1)#input is the num of time_step
        self.lstm1 = nn.LSTM(input_size=3,hidden_size=128,num_layers=2,batch_first=True)#3 is for number of features
        self.lstm2 = nn.LSTM(input_size=128,hidden_size=1,num_layers=2,batch_first=True)
    def forward(self, x):
        x1,x2,x3 = x.size()
        h1, (h1_T,c1_T) = self.lstm1(x)
        h2, (h2_T, c2_T) = self.lstm2(h1)
        h2 = h2.view(x1,-1)
        output= self.mlp(h2)
        return output 

In [6]:
min_val_loss = np.inf
mean_losses = []
losses = pd.DataFrame(columns = ['Training Loss', 'Testing Loss'])
model = LSTM()
loss = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(),lr=0.001)
for epoch in tqdm_notebook(range(1, epochs + 1)):
    mean_loss, n = 0.0 , train_features.shape[1]
    train_mean_loss, test_mean_loss = 0, 0
    for i in range(n):
      train, train_target = get_features(totensor(train_features), i)
      train, train_target = totensor(train), totensor(train_target)
      test, test_target = get_features(totensor(test_features), i)
      test, test_target = totensor(test), totensor(test_target)
      y_train, y_test = model(train), model(test)
      train_loss, test_loss = loss(y_train, train_target), loss(y_test, test_target)
      optimizer.zero_grad()
      train_loss.backward()
      optimizer.step()
      losses.loc[i,'Training Loss'], losses.loc[i,'Testing Loss'] = train_loss.detach().numpy(), test_loss.detach().numpy()
    train_loss, test_loss = losses['Training Loss'].mean(), losses['Testing Loss'].mean()
    mean_losses.append((train_loss, test_loss))
    if train_loss < min_val_loss:
      min_val_loss = train_loss
    print('Epoch: {:03d} | Lr: {:.20f} |Train loss: {:.8f}|Test loss: {:.8f}'.\
          format(epoch, optimizer.param_groups[0]['lr'], min_val_loss, test_loss))

print('\nTraining finished.\n')

  0%|          | 0/200 [00:00<?, ?it/s]

Epoch: 001 | Lr: 0.00100000000000000002 |Train loss: 0.87677940|Test loss: 0.92106085
Epoch: 002 | Lr: 0.00100000000000000002 |Train loss: 0.52883968|Test loss: 0.57368730
Epoch: 003 | Lr: 0.00100000000000000002 |Train loss: 0.38273744|Test loss: 0.52325443
Epoch: 004 | Lr: 0.00100000000000000002 |Train loss: 0.30990746|Test loss: 0.41723964
Epoch: 005 | Lr: 0.00100000000000000002 |Train loss: 0.28445313|Test loss: 0.42778846
Epoch: 006 | Lr: 0.00100000000000000002 |Train loss: 0.28445313|Test loss: 0.38844083
Epoch: 007 | Lr: 0.00100000000000000002 |Train loss: 0.25962535|Test loss: 0.49303278
Epoch: 008 | Lr: 0.00100000000000000002 |Train loss: 0.25962535|Test loss: 0.42399413
Epoch: 009 | Lr: 0.00100000000000000002 |Train loss: 0.25418689|Test loss: 0.50550355
Epoch: 010 | Lr: 0.00100000000000000002 |Train loss: 0.25418689|Test loss: 0.47527446
Epoch: 011 | Lr: 0.00100000000000000002 |Train loss: 0.25418689|Test loss: 0.55059960
Epoch: 012 | Lr: 0.00100000000000000002 |Train loss: 0

In [None]:
mean_losses = np.array(mean_losses)
plt.plot(np.arange(200), mean_losses[:,0])
plt.plot(np.arange(200), mean_losses[:,1])
plt.xlabel('epoch')
plt.ylabel('Training Loss')
plt.legend(['Train','Test'])