In [5]:
# Automatically reload files when they are changed so there is no need to restart the kernel.
%load_ext autoreload
%autoreload 2
# For plotting the loss curve
%matplotlib widget

import torch
from torch import nn
import numpy as np
import random
import matplotlib.pyplot as plt
import pickle

torch.backends.cudnn.deterministic = True

# This is the seed that I used to get the reported accuracy levels
seed = 2023
def reset_seed():
	torch.manual_seed(seed)
	random.seed(seed)
	np.random.seed(seed)

def cuda_get_device():
	is_cuda = torch.cuda.is_available()
	if is_cuda:
		return torch.device("cuda")
	else:
		print("GPU not available, CPU used")
		return torch.device("cpu")

# Define the model class
class GRUWithConvLayer(nn.Module):
	def __init__(self, input_size, output_size, hidden_dim, conv_dropout, fc_dropout):
		super(GRUWithConvLayer, self).__init__()
		self.hidden_dim = hidden_dim
		self.convlayer = nn.Conv2d(1, 1, (5, 5), stride=1, padding=0)
		self.batchnorm = nn.BatchNorm2d(1)
		self.conv_dropout = nn.Dropout(p=conv_dropout)
		self.gru = nn.GRU(input_size=input_size - 4, hidden_size=hidden_dim, batch_first=True)
		self.fc_dropout = nn.Dropout(p=fc_dropout)
		self.fc1= nn.Linear(hidden_dim, 100)
		self.fc2 = nn.Linear(100, output_size)
	
	def forward(self, x):
		after_conv = self.batchnorm(self.convlayer(x)).squeeze(1)
		after_conv_drop = self.conv_dropout(after_conv)
		hidden = self.gru(after_conv_drop)[0]
		out = self.fc1(hidden)
		out = self.fc_dropout(out)
		out = self.fc2(out)
		return out

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
from prep_data import preprocess_data_main

reset_seed()

# The number of samples following the last observed sample for which the model must predict the blockage status
pred_length = 10
# The number of mmWave samples used to make predictions; changing this will change the number of sequences available for training and testing
seq_length = 10
# The ratio of training to testing data; this proportion of the set of available sequences will be used for training
train_ratio = 0.9
# The number of sequences in each batch
batch_size = 2000
# Whether or not to augment all of the training sequences by reversing the order of the beams; this has been shown to reduce performance
	# for any prediction length, so it is not recommended
augment = False
# Whether or not to include the center beams in the observation data
remove_center = False
# If the center beams are included, whether to normalize the center and non-center beams jointly or separately
joint_normalize = True
# Whether or not to shuffle the training data before batching; if this is false, the same samples will be batched together every time
	# the data is loaded
shuffle = True

train_data, test_data, train_len, test_len = preprocess_data_main(pred_length, seq_length, train_ratio, batch_size, 
																  augment=augment, 
																  remove_center=remove_center,
																  joint_normalize=joint_normalize,
																  shuffle=shuffle)

print(train_len)
print(test_len)

55982
6226


In [7]:
reset_seed()
device = cuda_get_device()

### Model Hyperparameters
# The length of the hidden state vector used in the GRU
hidden_state_size = 20
# The dropout probability between the convolutional layer and the GRU
conv_dropout = 0.4
# The dropout probability between the two fully connected layers
fc_dropout = 0.4
# The weight applied to the loss for samples with a positive label; making this > 1 helps the model to learn patterns that accompany blockages
pos_weight = 1.4
# The learning rate
lr = 0.03
# The factor by which the learning rate is reduced at each milestone
gamma = 0.6
# The total number of epochs for training
n_epochs = 2600
# The milestones at which the learning rate is reduced
milestones =  [100, 200, 400, 1000, 1500, 1600, 1700]
# Whether or not to plot the loss curve
plot_losses = False

if remove_center:
	input_size = 54
else:
	input_size = 64

model = GRUWithConvLayer(input_size=input_size, output_size=1, hidden_dim=hidden_state_size, conv_dropout=conv_dropout, fc_dropout=fc_dropout)
model.to(device)

criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([pos_weight]).to(device))
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma)

# Training Run
loss_sequence = []
for epoch in range(1, n_epochs + 1):
	epoch_losses = torch.tensor([0],dtype=float).to(device)
	for data in train_data:
		optimizer.zero_grad()
		# Need to include an additional dimension of size 1 for the number of channels
		input_data = data[0].unsqueeze(1).float().to(device)
		target_pred = data[1].unsqueeze(1).float().to(device)
		output = model(input_data)[:,-1,:]
		loss = criterion(output, target_pred)
		loss.backward()
		optimizer.step()
		epoch_losses += loss
	scheduler.step()
	avg_epoch_loss = epoch_losses.to('cpu').item()/len(train_data)
	loss_sequence.append(avg_epoch_loss)
	if epoch%100 == 0:
		print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
		print("Loss: {:.4f}".format(avg_epoch_loss))

# Plot the loss curve
import matplotlib.pyplot as plt
if plot_losses:
	plt.close('all')
	plt.figure()
	plt.plot(loss_sequence)
	plt.show()

Epoch: 100/2600............. Loss: 0.6850
Epoch: 200/2600............. Loss: 0.6706
Epoch: 300/2600............. Loss: 0.6575
Epoch: 400/2600............. Loss: 0.6521
Epoch: 500/2600............. Loss: 0.6435
Epoch: 600/2600............. Loss: 0.6420
Epoch: 700/2600............. Loss: 0.6383
Epoch: 800/2600............. Loss: 0.6438
Epoch: 900/2600............. Loss: 0.6391
Epoch: 1000/2600............. Loss: 0.6374
Epoch: 1100/2600............. Loss: 0.6364
Epoch: 1200/2600............. Loss: 0.6326
Epoch: 1300/2600............. Loss: 0.6343
Epoch: 1400/2600............. Loss: 0.6338
Epoch: 1500/2600............. Loss: 0.6357
Epoch: 1600/2600............. Loss: 0.6306
Epoch: 1700/2600............. Loss: 0.6254
Epoch: 1800/2600............. Loss: 0.6244
Epoch: 1900/2600............. Loss: 0.6261
Epoch: 2000/2600............. Loss: 0.6276
Epoch: 2100/2600............. Loss: 0.6254
Epoch: 2200/2600............. Loss: 0.6246
Epoch: 2300/2600............. Loss: 0.6275
Epoch: 2400/2600....

In [8]:
# Get the test accuracy
reset_seed()
model.eval()

num_blocks_missed = 0
num_false_positives = 0
with torch.no_grad():
	for data in test_data:
		input_data = data[0].unsqueeze(1).float().to(device)
		target_pred = data[1].unsqueeze(1)
		output = torch.round(torch.sigmoid(model(input_data)[:,-1,:])).cpu()
		err = (output - target_pred).numpy()
		num_blocks_missed += np.sum(err < 0)
		num_false_positives += np.sum(err > 0)
	
print("Number of blocks missed: {}".format(num_blocks_missed))
print("Number of false positives: {}".format(num_false_positives))
print("Accuracy: {}".format(1 - (num_blocks_missed + num_false_positives)/test_len))

Number of blocks missed: 1912
Number of false positives: 425
Accuracy: 0.6246386122711212
