In [1]:
%matplotlib widget

# Load and preprocess data
import torch
from torch import nn
import numpy as np
import random

import os
import re

from prep_data import preprocess_data_3

seed = 2023
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

train_data, test_data = preprocess_data_3(17, 1, 16, 0.9)

Make sure the loss function and the activation function are not leading to vanishing gradients

In [7]:
seed = 2023
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)

def cuda_get_device():
	is_cuda = torch.cuda.is_available()
	if is_cuda:
		return torch.device("cuda")
	else:
		print("GPU not available, CPU used")
		return torch.device("cpu")

device = cuda_get_device()

class GRUModel(nn.Module):
	def __init__(self, input_size, output_size, hidden_dim, n_layers):
		super(GRUModel, self).__init__()

		# Defining some parameters
		self.hidden_dim = hidden_dim
		self.n_layers = n_layers

		#Defining the layers
		self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_dim)
		# May need a separate fc layer for each GRU unit?
		self.fc = nn.Linear(hidden_dim, output_size)
	
	def forward(self, x):
		# Passing in the input into the GRU layer
		hidden = self.gru(x)[0]
		
		# Reshaping the outputs such that it can be fit into the fully connected layer
		hidden = hidden.contiguous().view(-1, self.hidden_dim)
		out = self.fc(hidden)
		
		return out

# Model Hyperparameters
hidden_state_size = 12
fc_layers = 1
# Training hyperparameters
n_epochs = 265
lr=0.001

# Instantiate the model with hyperparameters
model = GRUModel(input_size=64, output_size=1, hidden_dim=hidden_state_size, n_layers=fc_layers)
model.to(device)

# Define Loss, Optimizer
# We may want to increase gamma so that the number of epochs can be reduced
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
milestones = [150, 300]
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.2)

# Training Run
for epoch in range(1, n_epochs + 1):
	epoch_losses = torch.tensor([0],dtype=float).to(device)
	for data in train_data:
		optimizer.zero_grad()
		input_data = torch.from_numpy(data[0]).float().to(device)
		target_pred = torch.tensor(data[1]).unsqueeze(0).float().to(device)
		output = model(input_data)[-1]
		# print(torch.log(torch.sigmoid(output)).item(), torch.log(1 - torch.sigmoid(output)).item(), target_pred.item())
		loss = criterion(output, target_pred)
		# print(loss)
		loss.backward()
		optimizer.step()
		epoch_losses += loss
	scheduler.step()
	if epoch%5 == 0:
		print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
		print("Loss: {:.4f}".format(epoch_losses.to('cpu').item()))

Epoch: 5/265............. Loss: 207.1676
Epoch: 10/265............. Loss: 158.9786
Epoch: 15/265............. Loss: 128.6552
Epoch: 20/265............. Loss: 107.4925
Epoch: 25/265............. Loss: 91.4171
Epoch: 30/265............. Loss: 79.3877
Epoch: 35/265............. Loss: 72.0446
Epoch: 40/265............. Loss: 75.2239
Epoch: 45/265............. Loss: 69.1583
Epoch: 50/265............. Loss: 59.3399
Epoch: 55/265............. Loss: 53.6087
Epoch: 60/265............. Loss: 44.4152
Epoch: 65/265............. Loss: 35.0096
Epoch: 70/265............. Loss: 41.5598
Epoch: 75/265............. Loss: 29.8573
Epoch: 80/265............. Loss: 48.7866
Epoch: 85/265............. Loss: 22.5969
Epoch: 90/265............. Loss: 21.2739
Epoch: 95/265............. Loss: 27.2620
Epoch: 100/265............. Loss: 15.0485
Epoch: 105/265............. Loss: 22.8979
Epoch: 110/265............. Loss: 38.4442
Epoch: 115/265............. Loss: 14.1057
Epoch: 120/265............. Loss: 32.0949
Epoch: 1

In [8]:
import matplotlib.pyplot as plt

predictions = []
targets = []
with torch.no_grad():
	for data in train_data:
			input_data = torch.from_numpy(data[0]).float().to(device)
			target_pred = torch.tensor(data[1]).unsqueeze(0)
			output = torch.sigmoid(model(input_data)[-1])
			predictions.append(output.cpu().item())
			targets.append(target_pred.item())

pred = np.array(predictions)
pred = np.rint(pred).astype(int)
targets_numpy = np.array(targets)

# plt.figure()
# plt.plot(pred)
# plt.plot(targets_numpy)
# plt.show()

print(np.sum(np.abs(targets_numpy-pred)))
print(np.sum(targets_numpy))
print(np.sum(pred))

3
290
289


In [9]:
# Evaluate on the test dataset

predictions = []
targets = []
with torch.no_grad():
	for data in test_data:
			input_data = torch.from_numpy(data[0]).float().to(device)
			target_pred = torch.tensor(data[1]).unsqueeze(0)
			output = torch.sigmoid(model(input_data)[-1])
			predictions.append(output.cpu().item())
			targets.append(target_pred.item())

pred = np.array(predictions)
pred = np.rint(pred).astype(int)
targets_numpy = np.array(targets)

print(np.sum(pred))
print(np.sum(np.abs(targets_numpy-pred)))
print(np.sum(targets_numpy))

# print(np.where(test_targets != pred))
# print(np.where(np.abs(test_targets-pred) != 0))

29
10
33


In [9]:
print(len(test_data))

66
