In [3]:
!ls /content/drive/MyDrive/ECE_542/Competition_Project

data.zip  metadata  ml_utils  Models  Predictions  test_data.zip


In [11]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
!cp /content/drive/MyDrive/ECE_542/Competition_Project/data.zip .

In [5]:
!unzip data.zip

Archive:  data.zip
   creating: data/
  inflating: data/.gitignore         
   creating: data/preprocessed_data/
   creating: data/splits/
   creating: data/splits/test/
  inflating: data/splits/test/subject_005_session_03__x.csv  
  inflating: data/splits/test/subject_005_session_03__y.csv  
  inflating: data/splits/test/subject_006_session_01__x.csv  
  inflating: data/splits/test/subject_006_session_01__y.csv  
  inflating: data/splits/test/subject_006_session_02__x.csv  
  inflating: data/splits/test/subject_006_session_02__y.csv  
  inflating: data/splits/test/subject_006_session_03__x.csv  
  inflating: data/splits/test/subject_006_session_03__y.csv  
  inflating: data/splits/test/subject_007_session_01__x.csv  
  inflating: data/splits/test/subject_007_session_01__y.csv  
  inflating: data/splits/test/subject_007_session_02__x.csv  
  inflating: data/splits/test/subject_007_session_02__y.csv  
  inflating: data/splits/test/subject_007_session_03__x.csv  
  inflating: data/splits

In [6]:
!rm data.zip

In [7]:
!cp -r /content/drive/MyDrive/ECE_542/Competition_Project/ml_utils .
!ls

data  drive  ml_utils  sample_data


In [8]:
base_path = "/content/drive/MyDrive/ECE_542/Competition_Project"

In [9]:
import os
import sys

In [10]:
import json
from collections import Counter
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
import numpy as np
from ml_utils.dataset import SubjectDataset

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cpu


In [13]:
base_data_path = os.path.join("data", "splits")
train_data_path = os.path.join(base_data_path, "train")
val_data_path = os.path.join(base_data_path, "val")

splits_file = os.path.join(base_path, "metadata", "split_ids.json")
with open(splits_file, "r") as f:
    split_ids = json.load(f)

In [14]:
# Training hyperparameters
batch_size = 256
num_epochs = 5

In [15]:
train_dataset = SubjectDataset(
    train_data_path, 
    split_ids["train"], 
    cache_len=len(split_ids["train"])
)
ys = train_dataset.index_store["label"].to_list()
counts = Counter(ys)
weights = np.array([1./counts[_y] for _y in ys])
sample_weights = torch.from_numpy(weights).float()
sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
train_iterations = (len(train_dataset) // batch_size) + ((len(train_dataset) % batch_size) != 0)

val_dataset = SubjectDataset(
    val_data_path, 
    split_ids["val"], 
    cache_len=len(split_ids["val"])
)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
val_iterations = (len(val_dataset) // batch_size) + ((len(val_dataset) % batch_size) != 0)

In [17]:
print(train_iterations)
print(val_iterations)

687
211


In [27]:
class OneDConvNet(nn.Module):
  def __init__(self, n_features, n_classes):
    super(OneDConvNet, self).__init__()
    self.Convolution_Layer_1 = nn.Conv1d(in_channels=n_features, out_channels=8, kernel_size=3, stride=1)
    self.Pool_Layer1         = nn.MaxPool1d(kernel_size=2, stride=2)
    self.Convolution_Layer_2 = nn.Conv1d(in_channels= 8, out_channels=16, kernel_size=3, stride=1)
    self.Pool_Layer2         = nn.MaxPool1d(kernel_size=2, stride=2)
    self.Convolution_Layer_3 = nn.Conv1d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
    self.Pool_Layer3         = nn.MaxPool1d(kernel_size=2, stride=2)
    self.FullConnected1      = nn.Linear(32*3,16)
    self.FullConnected2      = nn.Linear(16,8)
    self.FullConnected3      = nn.Linear(8, n_classes)
  
  def forward(self, x):
    x = self.Pool_Layer1(F.relu(self.Convolution_Layer_1(x)))
    x = self.Pool_Layer2(F.relu(self.Convolution_Layer_2(x)))
    x = self.Pool_Layer3(F.relu(self.Convolution_Layer_3(x)))
    N, C, T = x.size()
    #print(N,C,T)
    x = x.view(-1, C*T) # Flatten
    x = F.relu(self.FullConnected1(x))
    x = F.relu(self.FullConnected2(x))
    x = self.FullConnected3(x)
    return x

model = OneDConvNet(6, 4).to(device)

In [24]:
import numpy as np
X = np.random.randn(1, 6, 40)
T = torch.from_numpy(X).float().to(device)
y = model(T)

In [25]:
y.shape

torch.Size([1, 4])

In [19]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [20]:
def train_step(X, y, model, optimizer, criterion):

    y_pred = model(X)
    predicted_classes = torch.argmax(y_pred.detach(), dim=1)

    loss = criterion(y_pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    corrects = torch.sum(y.data == predicted_classes)

    return loss.item(), corrects

def val_step(X, y, model, criterion):

    with torch.no_grad():
        
        y_pred = model(X)
        predicted_classes = torch.argmax(y_pred.detach(), dim=1)
        loss = criterion(y_pred, y)
        corrects = torch.sum(y.data == predicted_classes)

    return loss.item(), corrects, predicted_classes.detach().cpu().numpy()

In [None]:
best_val_loss = float("inf")

for epoch in range(num_epochs):
    # Train for "n" number of iterations
    running_loss = 0.
    running_acc = 0.
    for iteration, (X, y) in enumerate(train_dataloader):

        X = X.float().to(device)
        y = y.view(X.size(0)).to(device)

        loss, corrects = train_step(X, y, model, optimizer, criterion)

        # Running metrics
        running_loss = running_loss + loss * X.size(0)
        running_acc = running_acc + corrects

        if iteration % 100 == 0:
            print(f"Iteration: {iteration}/{train_iterations} | train_loss: {loss} | train_acc: {corrects/X.size(0)}")

    train_loss = running_loss / len(train_dataset)
    train_acc = running_acc / len(train_dataset)

    # Validate
    running_val_loss = 0.
    running_val_acc = 0.
    for step, (X, y) in enumerate(val_dataloader):

        X = X.float().to(device)
        y = y.view(X.size(0)).to(device)

        loss, corrects, predicted_classes = val_step(X, y, model, criterion)
        # Running metrics
        running_val_loss = running_val_loss + loss * X.size(0)
        running_val_acc = running_val_acc + corrects

    val_loss = running_val_loss / len(val_dataset)
    val_acc = running_val_acc / len(val_dataset)

    if val_loss < best_val_loss:
        # Checkpoint model
        path = os.path.join(base_path, "Models", "checkpoint.pth")
        print(f"Saving model to {path}")
        torch.save(model.state_dict(), path)
        best_val_loss = val_loss

    print(f"Epoch: {epoch} | train_loss {train_loss} | train_acc: {train_acc} | val_loss: {val_loss} | val_acc: {val_acc}")

Iteration: 0/344 | train_loss: 1.4091529846191406 | train_acc: 0.20703125
Iteration: 100/344 | train_loss: 1.4063066244125366 | train_acc: 0.20703125
Iteration: 200/344 | train_loss: 1.414595365524292 | train_acc: 0.224609375
Iteration: 300/344 | train_loss: 1.4047514200210571 | train_acc: 0.23828125
Saving model to /content/drive/MyDrive/ECE_542/Competition_Project/Models/checkpoint.pth
Epoch: 0 | train_loss 1.3998874992465946 | train_acc: 0.23479467630386353 | val_loss: 1.3473030239956796 | val_acc: 0.08708234876394272
Iteration: 0/344 | train_loss: 1.3992516994476318 | train_acc: 0.24609375
Iteration: 100/344 | train_loss: 1.4056719541549683 | train_acc: 0.224609375
Iteration: 200/344 | train_loss: 1.3973077535629272 | train_acc: 0.224609375
Iteration: 300/344 | train_loss: 1.3964611291885376 | train_acc: 0.251953125
Saving model to /content/drive/MyDrive/ECE_542/Competition_Project/Models/checkpoint.pth
Epoch: 1 | train_loss 1.3993992315857808 | train_acc: 0.23362810909748077 | val

In [None]:
batch512_model = OneDConvNet(6, 4)
batch512_model.load_state_dict(torch.load(os.path.join(base_path, "Models", "batch512_model.pth")))
batch512_model = batch512_model.to(device)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

class Evaluation:

  def __init__(self):
        pass
  
  def aggregate_metrics(self, y, y_hat):
    
    """ Metrics for entire set 
        
        Arguments passed are: 
                      y     : ground truth  
                      y_hat : predicted class
        Calculations:
                  accuracy  : (tp + tn) / (total samples =(p + n))
                  precision : tp / (tp + fp)
                  recall    : tp / (tp + fn)
                  f1        : 2 tp / (2 tp + fp + fn)
    """
    accuracy  = accuracy_score(y, y_hat)
    precision = precision_score(y, y_hat, average="macro")
    recall    = recall_score(y, y_hat, average="macro")
    f1        = f1_score(y, y_hat, average="macro")
    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1": f1
        }

  def classwise_metrics(self, y, y_hat):

    """ Metrics for each class. 
        Average is set to macro for calculating the score of each label, and find their unweighted mean. 
        This does not take label imbalance into account.
        
        Arguments passed are: 
                      y     : ground truth  
                      y_hat : predicted class
        Calculations:
                  accuracy  : (tp + tn) / (total samples =(p + n))
                  precision : tp / (tp + fp)
                  recall    : tp / (tp + fn)
                  f1        : 2 tp / (2 tp + fp + fn)
    """
    
    class_precision = precision_score(y, y_hat, average=None)
    class_recall    = recall_score(y, y_hat, average=None)
    class_f1        = f1_score(y, y_hat, average=None)
    
    return {
        "Precision": class_precision,
        "Recall": class_recall,
        "F1": class_f1
    }

In [None]:
evaluator = Evaluation()

In [None]:
val_dataset = SubjectDataset(
    val_data_path, 
    split_ids["val"], 
    cache_len=len(split_ids["val"])
)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
val_iterations = (len(val_dataset) // batch_size) + ((len(val_dataset) % batch_size) != 0)

In [None]:
output = []
labels = []
for X, y in val_dataloader:
    X = X.float().to(device)
    y = y.view(X.size(0)).to(device)

    y_pred = batch512_model(X)
    predicted_classes = torch.argmax(y_pred, dim=1).detach().cpu().numpy()
    y_true = y.cpu().numpy()

    output.append(predicted_classes)
    labels.append(y_true)

In [None]:
_output = np.concatenate(output, axis=0)
_labels = np.concatenate(labels, axis=0)

In [None]:
agg_metrics = evaluator.aggregate_metrics(_labels, _output)
print("Agg metrics")
print(agg_metrics)
classwise_metrics = evaluator.classwise_metrics(_labels, _output)
print("Classwise Metrics")
print(classwise_metrics)

Agg metrics
{'Accuracy': 0.08708235163163461, 'Precision': 0.07582919929956072, 'Recall': 0.23955715831421104, 'F1': 0.07863106766551368}
Classwise Metrics
{'Precision': array([0.        , 0.        , 0.05236179, 0.25095501]), 'Recall': array([0.        , 0.        , 0.76788658, 0.19034205]), 'F1': array([0.        , 0.        , 0.09803839, 0.21648588])}


  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
test_data_path = os.path.join(base_data_path, "test")
test_dataset = SubjectDataset(
    test_data_path, 
    split_ids["test"], 
    cache_len=len(split_ids["test"])
)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_iterations = (len(test_dataset) // batch_size) + ((len(test_dataset) % batch_size) != 0)

In [None]:
output = []
labels = []
for X, y in test_dataloader:
    X = X.float().to(device)
    y = y.view(X.size(0)).to(device)

    y_pred = batch512_model(X)
    predicted_classes = torch.argmax(y_pred, dim=1).detach().cpu().numpy()
    y_true = y.cpu().numpy()

    output.append(predicted_classes)
    labels.append(y_true)

In [None]:
_output = np.concatenate(output, axis=0)
_labels = np.concatenate(labels, axis=0)

In [None]:
agg_metrics = evaluator.aggregate_metrics(_labels, _output)
print("Agg metrics")
print(agg_metrics)
classwise_metrics = evaluator.classwise_metrics(_labels, _output)
print("Classwise Metrics")
print(classwise_metrics)

Agg metrics
{'Accuracy': 0.07522073698738915, 'Precision': 0.05783804602099915, 'Recall': 0.23816130115661177, 'F1': 0.07499789741740742}
Classwise Metrics
{'Precision': array([0.        , 0.        , 0.04056379, 0.1907884 ]), 'Recall': array([0.        , 0.        , 0.68315397, 0.26949123]), 'F1': array([0.        , 0.        , 0.07658044, 0.22341115])}


  _warn_prf(average, modifier, msg_start, len(result))
