In [3]:
import numpy as np
from google.colab import drive
import torch
from torch.utils import data
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
print(torch.__version__)

1.7.0+cu101


In [4]:
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
path = '/content/drive/My Drive/'
train_set = np.load(path + 'ntrain.npy' ,allow_pickle=True)
train_labels = np.load(path + 'ntrain_labels.npy' ,allow_pickle=True)
val_set = np.load(path + 'nval.npy' ,allow_pickle=True)
val_labels = np.load(path + 'nval_labels.npy' ,allow_pickle=True)
test_set = np.load(path + 'ntest.npy' ,allow_pickle=True)

In [6]:
class CustomizedDataset(data.Dataset):
    def __init__(self, x,y,k):       
        self.x = x
        self.y = y
        self.k = k
        self.inputs = []
        self.lab = np.array([])
        self.idx = []
        self._init_dataset()
        
    def __len__(self):
        return len(self.lab)

    def __getitem__(self,index):
          i = self.idx[index]
          return torch.from_numpy(np.concatenate(self.inputs[i-self.k:i+self.k+1],axis=0)).float(),torch.tensor(self.lab[index]).long()
    
    def _init_dataset(self):
        idx=0
        for i in range(len(self.x)):
            start_time = time.time()
            s = len(self.x[i])
            x=np.pad(self.x[i],((self.k, self.k), (0, 0)), 'constant', constant_values=0)

            idx += 2*self.k
            for j in range(s):           
              self.idx+=[idx-self.k]
              idx += 1

            self.inputs+=list(x)  
            self.lab = np.append(self.lab,self.y[i])
            end_time = time.time()
        return np.array(self.inputs), self.lab

In [7]:
gpu = torch.cuda.is_available()
print('gpu? ',gpu)
if gpu:
  device = 'cuda'
# Hyperparameters to be changed to acheive best performance
k = 41

# Training dataset
print('Applying context with k = {} to the training set'.format(k))
train_dataset = CustomizedDataset(train_set, train_labels,k)
# Validation dataset
print('Applying context with k = {} to the validation set'.format(k))
val_dataset = CustomizedDataset(val_set,val_labels,k)
# Test dataset

gpu?  True
Applying context with k = 41 to the training set
Applying context with k = 41 to the validation set


In [8]:
num_workers = 4 if gpu else 0 

train_loader_args = dict(shuffle=True, batch_size=256, num_workers=num_workers, pin_memory=True)

train_loader = data.DataLoader(train_dataset, **train_loader_args)

In [9]:
# Validation
val_loader_args = dict(shuffle=False, batch_size=256, num_workers=num_workers, pin_memory=True)
val_loader = data.DataLoader(val_dataset, **val_loader_args)

In [38]:
class TestDataset(data.Dataset):
    def __init__(self, X,k):
       
        self.X = X
        self.k = k
        self.samples = []
        self.length = []
        self._init_dataset()
        self.ind = np.arange(self.length[-1])
        km = [self.k*(2*i+1) for i in range(len(self.length))]
        
        b = 0
        for i in range(self.length[-1]):
            if i == self.length[b]:
                b = b+1
                self.ind[i] = self.ind[i] + km[b]
            else:
                self.ind[i] = self.ind[i] + km[b]
        

    def __len__(self):
        print(len(self.samples),self.length[-1])
        return self.length[-1]

    def __getitem__(self,index):
        X = np.concatenate((self.samples[self.ind[index]-self.k:self.ind[index]+ self.k+1]),axis=0)
        return torch.from_numpy(X).float()
    
    def _init_dataset(self):
        s = 0
        for i in range(len(self.X)):
            p = np.pad(self.X[i], ((self.k, self.k), (0, 0)), 'constant', constant_values=0)
            s = s + len(self.X[i])
            self.length.append(s)
            self.samples = self.samples + list(p)
         
        return np.array(self.samples)

In [42]:
test_dataset = TestDataset(test_set,k)
# batch size of 256 for faster inference, we can then unroll it for saving
test_loader_args = dict(shuffle=False, batch_size=256, num_workers=num_workers, pin_memory=True)
test_loader = data.DataLoader(test_dataset, **test_loader_args)

In [10]:
def swish(x):
  return x*F.sigmoid(x)

In [11]:
act = swish
class MishNet(nn.Module):
  def __init__(self, input_size):
    super(MishNet, self).__init__()

    self.dp = nn.Dropout(0.3)
    self.fc1 = nn.Linear(input_size, 4096)
    self.bn1 = nn.BatchNorm1d(4096)
    self.fc2 = nn.Linear(4096, 2048)
    self.bn2 = nn.BatchNorm1d(2048)
    self.fc3 = nn.Linear(2048, 2048)
    self.bn3 = nn.BatchNorm1d(2048)
    self.fc4 = nn.Linear(2048, 1024)
    self.bn4 = nn.BatchNorm1d(1024)
    self.fc5 = nn.Linear(1024, 1024)
    self.bn5 = nn.BatchNorm1d(1024)
    self.fc6 = nn.Linear(1024, 1024)
    self.bn6 = nn.BatchNorm1d(1024)
    self.fc7 = nn.Linear(1024, 1024)
    self.bn7 = nn.BatchNorm1d(1024)
    self.fc8 = nn.Linear(1024, 512)
    self.bn8 = nn.BatchNorm1d(512)
    self.fc9 = nn.Linear(512, 512)
    self.bn9 = nn.BatchNorm1d(512)
    self.fc10 = nn.Linear(512, 512)
    self.bn10 = nn.BatchNorm1d(512)
    self.fc11 = nn.Linear(512, 512)
    self.bn11 = nn.BatchNorm1d(512)
    self.fc12 = nn.Linear(512, 512)
    self.fc13 = nn.Linear(512, 346)

  def forward(self,x):
    x = self.bn1(self.fc1(x))
    x = self.dp(act(x))
    x = self.bn2(self.fc2(x))
    x = self.dp(act(x))
    x = self.bn3(self.fc3(x))
    x = self.dp(act(x))
    x = self.bn4(self.fc4(x))
    x = self.dp(act(x))
    x = self.bn5(self.fc5(x))
    x = self.dp(act(x))
    x = self.bn6(self.fc6(x))
    x = self.dp(act(x))
    x = self.bn7(self.fc7(x))
    x = self.dp(act(x))
    x = self.bn8(self.fc8(x))
    x = self.dp(act(x))
    x = self.bn9(self.fc9(x))
    x = self.dp(act(x))
    x = self.bn9(self.fc9(x))
    x = self.dp(act(x))
    x = self.bn10(self.fc10(x))
    x = self.dp(act(x))
    x = self.bn11(self.fc11(x))
    x = self.dp(act(x))
    x = act(self.fc12(x))
    
    return self.fc13(x)

In [None]:
train_size = 13*(2*k+1)
Mmodel = MishNet(train_size)
Mmodel.to(device)
Mmodel.apply(init_xavier)

In [12]:
def init_xavier(m):
  if type(m) == nn.Linear:
    fan_in = m.weight.size()[1]
    fan_out = m.weight.size()[0]
    std = np.sqrt(1.0/(fan_in + fan_out))
    m.weight.data.normal_(0,std)

In [13]:
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()

    running_loss = 0.0
    total_predictions = 0.0
    correct_predictions = 0.0
            
    for batch_idx, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        data = data.to(device)
        target = target.to(device)

        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        
        total_predictions += target.size(0)
        correct_predictions += (predicted == target).sum().item()
        
        loss = criterion(outputs, target)
        running_loss += loss.item()

        loss.backward()
        optimizer.step()

        if (batch_idx+1) % 1000 == 0:
          print(batch_idx+1)   
          print("training acc: ",(correct_predictions/total_predictions)*100.0)
    
    running_loss /= len(train_loader)
    acc = (correct_predictions/total_predictions)*100.0
    print('Training Loss: ', running_loss)
    print('Training Accuracy: ', acc, '%')
    return running_loss,acc

In [14]:
def inference(model, val_loader, criterion):
    with torch.no_grad():
        model.eval()
        running_loss = 0.0
        total_predictions = 0.0
        correct_predictions = 0.0

        for batch_idx, (data, target) in enumerate(val_loader):   
            data = data.to(device)
            target = target.to(device)

            outputs = model(data)

            _, predicted = torch.max(outputs.data, 1)
            total_predictions += target.size(0)
            correct_predictions += (predicted == target).sum().item()

            loss = criterion(outputs, target).detach()
            running_loss += loss.item()


        running_loss /= len(val_loader)
        acc = (correct_predictions/total_predictions)*100.0
        print('Validation Loss: ', running_loss)
        print('Validation Accuracy: ', acc, '%')
        scheduler.step(running_loss)
        return running_loss, acc

In [26]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(Mmodel.parameters(), lr=0.01, momentum=0.9, nesterov=True)
#optimizer = optim.Adam(Mmodel.parameters(),lr = 0.001,weight_decay=0.0000001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, threshold=0.0001,
                                           cooldown=2, min_lr=0.000001, eps=1e-08,verbose=True)

In [33]:
n_epochs = 10
Train_acc = []
Train_loss = []
Val_loss = []
Val_acc = []

for i in range(n_epochs):
    print('Epoch: ',i+1)
    train_loss,acc = train_epoch(Mmodel, train_loader, criterion, optimizer)
    val_loss, val_acc = inference(Mmodel, val_loader, criterion)
    Train_loss.append(train_loss)
    Train_acc.append(acc)
    Val_loss.append(val_loss)
    Val_acc.append(val_acc)
    print('='*20)

Epoch:  1




1000
training acc:  96.806640625
2000
training acc:  96.83535156250001
3000
training acc:  96.84856770833333
4000
training acc:  96.84541015625
5000
training acc:  96.8490625
6000
training acc:  96.84212239583333
7000
training acc:  96.83794642857143
8000
training acc:  96.844775390625
9000
training acc:  96.840234375
10000
training acc:  96.84
11000
training acc:  96.84076704545454
12000
training acc:  96.84514973958333
13000
training acc:  96.84405048076923
14000
training acc:  96.84740513392858
15000
training acc:  96.84645833333333
16000
training acc:  96.84548339843751
17000
training acc:  96.84250919117648
18000
training acc:  96.84164496527778
19000
training acc:  96.84229029605264
20000
training acc:  96.84384765625
21000
training acc:  96.84361979166667
22000
training acc:  96.84289772727273
23000
training acc:  96.84295176630435
24000
training acc:  96.84597981770834
25000
training acc:  96.8456875
26000
training acc:  96.8451171875
27000
training acc:  96.84377893518518
2800

In [34]:
# saving the entire model in order to resume training after Colab's disconnection
full_model_path = '/content/drive/My Drive/'+'BestModel.pt'
torch.save(Mmodel, full_model_path)

In [31]:
full_model_path = '/content/drive/My Drive/'+'BestModel.pt'
Mmodel = torch.load(full_model_path)

In [43]:
def test_model(model, test_loader):
    predNo = 0
    with torch.no_grad():
        model.eval()
        pred = []

        for batch_idx, (data) in enumerate(test_loader):
            data = data.to(device)
            output = model(data)
            _,predicted = torch.max(output.data, 1)
            pred.append(predicted.cpu().numpy())
            predNo += 1

        model.train()
        print(predNo)
        return np.array(pred)

In [None]:
pred = test_model(Mmodel, test_loader)

In [None]:
# count the total number of frames
framesNo = 0
for i in range(pred[0].size):
  framesNo += pred[i].shape[0]
print(framesNo)

2813174


In [None]:
# flatten the prediction (it comes in a batches of 256)
flatpred = []
for i in range(10989):
  for j in range(pred[i].size):
    flatpred.append(pred[i][j])
    
id = [i for i in range(2813174)]
submit = np.vstack((id,flatpred)).T

In [None]:
import pandas as pd 
pd.DataFrame(data = submit).to_csv(path+"HW3Sol.csv", header=['id','label'], index=None)