In [None]:
!ls /content/drive/MyDrive/ECE_542/Competition_Project

data.zip  ml_utils  Predictions    window2_data.zip
metadata  Models    test_data.zip


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp /content/drive/MyDrive/ECE_542/Competition_Project/window2_data.zip .

In [None]:
!unzip window2_data.zip

Archive:  window2_data.zip
   creating: window2_data/preprocessed_data/
   creating: window2_data/preprocessed_data_test/
  inflating: window2_data/preprocessed_data_test/subject_009_session_01__x.csv  
  inflating: window2_data/preprocessed_data_test/subject_009_session_01__y.csv  
  inflating: window2_data/preprocessed_data_test/subject_010_session_01__x.csv  
  inflating: window2_data/preprocessed_data_test/subject_010_session_01__y.csv  
  inflating: window2_data/preprocessed_data_test/subject_011_session_01__x.csv  
  inflating: window2_data/preprocessed_data_test/subject_011_session_01__y.csv  
  inflating: window2_data/preprocessed_data_test/subject_012_session_01__x.csv  
  inflating: window2_data/preprocessed_data_test/subject_012_session_01__y.csv  
   creating: window2_data/splits/
   creating: window2_data/splits/test/
  inflating: window2_data/splits/test/subject_005_session_03__x.csv  
  inflating: window2_data/splits/test/subject_005_session_03__y.csv  
  inflating: wind

In [None]:
!rm data.zip

In [None]:
!cp -r /content/drive/MyDrive/ECE_542/Competition_Project/ml_utils .
!ls

drive  ml_utils  sample_data  window2_data  window2_data.zip


In [None]:
base_path = "/content/drive/MyDrive/ECE_542/Competition_Project"

In [None]:
import os
import sys
# sys.path.append("ece542-competition-project")

In [None]:
import json
from collections import Counter
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler
import numpy as np
from ml_utils.dataset import SubjectDataset

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Device: {device}")

Device: cpu


In [None]:
base_data_path = os.path.join("window2_data", "splits")
train_data_path = os.path.join(base_data_path, "train")
val_data_path = os.path.join(base_data_path, "val")

splits_file = os.path.join(base_path, "metadata", "split_ids.json")
with open(splits_file, "r") as f:
    split_ids = json.load(f)

In [None]:
stats_path = os.path.join(base_path, "metadata", "statistics.json")
with open(stats_path, "r") as f:
    stats = json.load(f)

In [None]:
min = np.array([v["min"] for k, v in stats.items()])
max = np.array([v["max"] for k, v in stats.items()])

min = torch.from_numpy(min).float()
min = torch.unsqueeze((torch.unsqueeze(min, 0)), -1)
min = min.to(device)
max = torch.from_numpy(max).float().to(device)
max = torch.unsqueeze((torch.unsqueeze(max, 0)), -1)
max = max.to(device)

In [None]:
min.shape

torch.Size([1, 6, 1])

In [None]:
# Training hyperparameters
batch_size = 128
num_epochs = 30

In [None]:
train_dataset = SubjectDataset(
    train_data_path, 
    split_ids["train"]
)
ys = train_dataset.y.tolist()
counts = Counter(ys)
weights = np.array([1./counts[_y] for _y in ys])
sample_weights = torch.from_numpy(weights).float()
sampler = WeightedRandomSampler(
    weights=sample_weights,
    num_samples=len(sample_weights),
    replacement=True)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
train_iterations = (len(train_dataset) // batch_size) + ((len(train_dataset) % batch_size) != 0)

val_dataset = SubjectDataset(
    val_data_path, 
    split_ids["val"]
)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
val_iterations = (len(val_dataset) // batch_size) + ((len(val_dataset) % batch_size) != 0)

Converting uid 005_02
Converting uid 001_06
Converting uid 003_02
Converting uid 001_05
Converting uid 002_02
Converting uid 003_01
Converting uid 003_03
Converting uid 005_01
Converting uid 001_07
Converting uid 002_05
Converting uid 004_02
Converting uid 002_03
Converting uid 001_02
Converting uid 002_04
Converting uid 001_03
Converting uid 004_01
Converting uid 001_08
Converting uid 002_01
Converting uid 001_01
Converting uid 001_04


In [None]:
print(train_iterations)
print(val_iterations)

1373
422


In [None]:
class OneDConvNet(nn.Module):
  def __init__(self, n_features, n_classes, base_filters=32):
    super(OneDConvNet, self).__init__()

    self.conv1 = nn.Conv1d(in_channels=n_features, out_channels=base_filters, kernel_size=3, stride=1, padding=1)
    self.norm1 = nn.LayerNorm(base_filters*80)
    self.pool1 = nn.AdaptiveAvgPool1d(40)
    self.conv2 = nn.Conv1d(in_channels=base_filters, out_channels=base_filters*2, kernel_size=3, stride=1, padding=1)
    self.norm2 = nn.LayerNorm(base_filters*2*40)
    self.pool2 = nn.AdaptiveAvgPool1d(20)
    self.conv3 = nn.Conv1d(in_channels=base_filters*2, out_channels=base_filters*4, kernel_size=3, stride=1, padding=1)
    self.norm3 = nn.LayerNorm(base_filters*4*20)
    self.pool3 = nn.AdaptiveAvgPool1d(10)
    self.conv4 = nn.Conv1d(in_channels=base_filters*4, out_channels=base_filters*8, kernel_size=3, stride=1, padding=1)
    self.norm4 = nn.LayerNorm(base_filters*8*10)
    self.fc1 = nn.Linear(base_filters*8, base_filters*16)
    self.dropout5 = nn.Dropout(0.4)
    self.fc2 = nn.Linear(base_filters*16, n_classes)
  
  def forward(self, x):
    x = self.conv1(x)
    N, C, T = x.shape
    x = x.view(N, C*T)
    x = self.norm1(x)
    x = x.view(N, C, T)
    x = self.pool1(F.relu(x))

    x = self.conv2(x)
    N, C, T = x.shape
    x = x.view(N, C*T)
    x = self.norm2(x)
    x = x.view(N, C, T)
    x = self.pool2(F.relu(x))

    x = self.conv3(x)
    N, C, T = x.shape
    x = x.view(N, C*T)
    x = self.norm3(x)
    x = x.view(N, C, T)
    x = self.pool3(F.relu(x))

    x = self.conv4(x)
    N, C, T = x.shape
    x = x.view(N, C*T)
    x = self.norm4(x)
    x = x.view(N, C, T)
    x = F.relu(x)

    N, C, T = x.size()
    x = x.mean(dim=-1) # Flatten
    x = self.dropout5(F.relu(self.fc1(x)))
    x = self.fc2(x)
    return x

model = OneDConvNet(6, 4).to(device)

In [None]:
# import numpy as np
# X = np.random.randn(1, 6, 40)
# T = torch.from_numpy(X).float().to(device)
# y = model(T)

In [None]:
# y.shape

In [None]:
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=0.001)
# optimizer = optim.RMSprop(model.parameters(), lr=1e-3)
scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=1, verbose=True)
criterion = nn.CrossEntropyLoss()

In [None]:
def train_step(X, y, model, optimizer, criterion):

    y_pred = model(X)
    predicted_classes = torch.argmax(y_pred.detach(), dim=1)

    loss = criterion(y_pred, y)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    corrects = torch.sum(y.data == predicted_classes)

    return loss.item(), corrects

def val_step(X, y, model, criterion):

    with torch.no_grad():
        
        y_pred = model(X)
        predicted_classes = torch.argmax(y_pred.detach(), dim=1)
        loss = criterion(y_pred, y)
        corrects = torch.sum(y.data == predicted_classes)

    return loss.item(), corrects, predicted_classes.detach().cpu().numpy()

In [None]:
best_val_loss = float("inf")

for epoch in range(num_epochs):
    # Train for "n" number of iterations
    running_loss = 0.
    running_acc = 0.
    for iteration, (X, y) in enumerate(train_dataloader):

        X = X.float().to(device)
        # Normalize
        X = (X - min) / (max - min)

        y = y.view(X.size(0)).to(device)

        loss, corrects = train_step(X, y, model, optimizer, criterion)

        # Running metrics
        running_loss = running_loss + loss * X.size(0)
        running_acc = running_acc + corrects

        if iteration % 100 == 0:
            print(f"Iteration: {iteration}/{train_iterations} | train_loss: {loss} | train_acc: {corrects/X.size(0)}")

    train_loss = running_loss / len(train_dataset)
    train_acc = running_acc / len(train_dataset)

    # Validate
    running_val_loss = 0.
    running_val_acc = 0.
    for step, (X, y) in enumerate(val_dataloader):

        X = X.float().to(device)
        X = (X - min) / (max - min)

        y = y.view(X.size(0)).to(device)

        loss, corrects, predicted_classes = val_step(X, y, model, criterion)
        # Running metrics
        running_val_loss = running_val_loss + loss * X.size(0)
        running_val_acc = running_val_acc + corrects

    val_loss = running_val_loss / len(val_dataset)
    val_acc = running_val_acc / len(val_dataset)

    scheduler.step(val_loss)

    if val_loss < best_val_loss:
        # Checkpoint model
        path = os.path.join(base_path, "Models", "window2_adaptive.pth")
        print(f"Saving model to {path}")
        torch.save(model.state_dict(), path)
        best_val_loss = val_loss

    print(f"Epoch: {epoch} | train_loss {train_loss} | train_acc: {train_acc} | val_loss: {val_loss} | val_acc: {val_acc}")

Iteration: 0/1373 | train_loss: 1.395035743713379 | train_acc: 0.203125
Iteration: 100/1373 | train_loss: 1.369444489479065 | train_acc: 0.296875
Iteration: 200/1373 | train_loss: 1.3594839572906494 | train_acc: 0.34375
Iteration: 300/1373 | train_loss: 1.2352980375289917 | train_acc: 0.4921875
Iteration: 400/1373 | train_loss: 1.2086188793182373 | train_acc: 0.4453125
Iteration: 500/1373 | train_loss: 0.8872849345207214 | train_acc: 0.515625
Iteration: 600/1373 | train_loss: 0.5142344236373901 | train_acc: 0.8125
Iteration: 700/1373 | train_loss: 0.44590508937835693 | train_acc: 0.7734375
Iteration: 800/1373 | train_loss: 0.3981553912162781 | train_acc: 0.8125
Iteration: 900/1373 | train_loss: 0.37308406829833984 | train_acc: 0.8046875
Iteration: 1000/1373 | train_loss: 0.9112809300422668 | train_acc: 0.671875
Iteration: 1100/1373 | train_loss: 0.31590476632118225 | train_acc: 0.8515625
Iteration: 1200/1373 | train_loss: 0.42371317744255066 | train_acc: 0.8046875
Iteration: 1300/1373 

In [None]:
np.array([train_dataset.y[0]])

array([0])

In [None]:
best_model = OneDConvNet(6, 4)
best_model.load_state_dict(torch.load(os.path.join(base_path, "Models", "window2_adaptive.pth")))
best_model = best_model.to(device)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

class Evaluation:

  def __init__(self):
        pass
  
  def aggregate_metrics(self, y, y_hat):
    
    """ Metrics for entire set 
        
        Arguments passed are: 
                      y     : ground truth  
                      y_hat : predicted class
        Calculations:
                  accuracy  : (tp + tn) / (total samples =(p + n))
                  precision : tp / (tp + fp)
                  recall    : tp / (tp + fn)
                  f1        : 2 tp / (2 tp + fp + fn)
    """
    accuracy  = accuracy_score(y, y_hat)
    precision = precision_score(y, y_hat, average="macro")
    recall    = recall_score(y, y_hat, average="macro")
    f1        = f1_score(y, y_hat, average="macro")
    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1": f1
        }

  def classwise_metrics(self, y, y_hat):

    """ Metrics for each class. 
        Average is set to macro for calculating the score of each label, and find their unweighted mean. 
        This does not take label imbalance into account.
        
        Arguments passed are: 
                      y     : ground truth  
                      y_hat : predicted class
        Calculations:
                  accuracy  : (tp + tn) / (total samples =(p + n))
                  precision : tp / (tp + fp)
                  recall    : tp / (tp + fn)
                  f1        : 2 tp / (2 tp + fp + fn)
    """
    
    class_precision = precision_score(y, y_hat, average=None)
    class_recall    = recall_score(y, y_hat, average=None)
    class_f1        = f1_score(y, y_hat, average=None)
    
    return {
        "Precision": class_precision,
        "Recall": class_recall,
        "F1": class_f1
    }

In [None]:
evaluator = Evaluation()

In [None]:
val_dataset = SubjectDataset(
    val_data_path, 
    split_ids["val"]
)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
val_iterations = (len(val_dataset) // batch_size) + ((len(val_dataset) % batch_size) != 0)

Converting uid 001_08
Converting uid 002_01
Converting uid 001_01
Converting uid 001_04


In [None]:
output = []
labels = []
for X, y in val_dataloader:
    X = X.float().to(device)
    X = (X - min) / (max - min)
    
    y = y.view(X.size(0)).to(device)

    y_pred = best_model(X)
    predicted_classes = torch.argmax(y_pred, dim=1).detach().cpu().numpy()
    y_true = y.cpu().numpy()

    output.append(predicted_classes)
    labels.append(y_true)

In [None]:
_output = np.concatenate(output, axis=0)
_labels = np.concatenate(labels, axis=0)

In [None]:
agg_metrics = evaluator.aggregate_metrics(_labels, _output)
print("Agg metrics")
print(agg_metrics)
classwise_metrics = evaluator.classwise_metrics(_labels, _output)
print("Classwise Metrics")
print(classwise_metrics)

Agg metrics
{'Accuracy': 0.8995103136639504, 'Precision': 0.8075946057190717, 'Recall': 0.9303976681936282, 'F1': 0.8602390828991545}
Classwise Metrics
{'Precision': array([0.97917573, 0.78166402, 0.77984953, 0.68968915]), 'Recall': array([0.89412407, 0.94946894, 0.98675497, 0.89124269]), 'F1': array([0.93471914, 0.85743351, 0.87118582, 0.77761786])}


In [None]:
test_data_path = os.path.join(base_data_path, "test")
test_dataset = SubjectDataset(
    test_data_path, 
    split_ids["test"]
)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
test_iterations = (len(test_dataset) // batch_size) + ((len(test_dataset) % batch_size) != 0)

Converting uid 005_03
Converting uid 006_01
Converting uid 006_02
Converting uid 006_03
Converting uid 007_01
Converting uid 007_02
Converting uid 007_03
Converting uid 007_04
Converting uid 008_01


In [None]:
output = []
labels = []
for X, y in test_dataloader:
    X = X.float().to(device)
    X = (X - min) / (max - min)
    
    y = y.view(X.size(0)).to(device)

    y_pred = best_model(X)
    predicted_classes = torch.argmax(y_pred, dim=1).detach().cpu().numpy()
    y_true = y.cpu().numpy()

    output.append(predicted_classes)
    labels.append(y_true)

In [None]:
_output = np.concatenate(output, axis=0)
_labels = np.concatenate(labels, axis=0)

In [None]:
agg_metrics = evaluator.aggregate_metrics(_labels, _output)
print("Agg metrics")
print(agg_metrics)
classwise_metrics = evaluator.classwise_metrics(_labels, _output)
print("Classwise Metrics")
print(classwise_metrics)

Agg metrics
{'Accuracy': 0.8995103136639504, 'Precision': 0.8075946057190717, 'Recall': 0.9303976681936282, 'F1': 0.8602390828991545}
Classwise Metrics
{'Precision': array([0.97917573, 0.78166402, 0.77984953, 0.68968915]), 'Recall': array([0.89412407, 0.94946894, 0.98675497, 0.89124269]), 'F1': array([0.93471914, 0.85743351, 0.87118582, 0.77761786])}


In [None]:
path = os.path.join(base_path, "Models", "window2_adaptive_long.pth")
print(f"Saving model to {path}")
torch.save(model.state_dict(), path)

Saving model to /content/drive/MyDrive/ECE_542/Competition_Project/Models/checkpoint_instance_norm_dropout_sgd_lr3_adaptive_window2_long.pth
