In [3]:
!pip3 install torch
from sklearn import preprocessing
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import time
import pathlib
import dill
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

Collecting skorch
[?25l  Downloading https://files.pythonhosted.org/packages/42/21/4936b881b33de285faa0b36209afe4f9724a0875b2225abdc63b23d384a3/skorch-0.8.0-py3-none-any.whl (113kB)
[K     |████████████████████████████████| 122kB 2.9MB/s 
Installing collected packages: skorch
Successfully installed skorch-0.8.0


In [4]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
class downwardSlope(nn.Module):
    def __init__(self, maxSeqLength, outSize):
        super().__init__()
        convOutLength = maxSeqLength
        padding = 1
        dilation = 1
        kernel_size = 15
        stride = 1

        self.conv1 = nn.Conv1d(13, 32, kernel_size=kernel_size, dilation =dilation ,stride=stride, padding=padding)
        #convOutLength calculated using equation in nn.conv1d documentation
        convOutLength = (convOutLength + 2*padding -dilation*(kernel_size-1) -1)//stride + 1
        self.conv2 = nn.Conv1d(32, 32, 15, padding=padding)
        convOutLength = (convOutLength + 2*padding -dilation*(kernel_size-1) -1)//stride + 1
        self.conv3 = nn.Conv1d(32, 32, 15, padding=padding)
        convOutLength = (convOutLength + 2*padding -dilation*(kernel_size-1) -1)//stride + 1
        self.maxPool = nn.MaxPool1d(convOutLength)
        self.fc1 = nn.Linear(32, outsize)

    def forward(self,x):
        x= self.conv1(x)
        x= self.conv2(x)
        x= self.conv3(x)
        x = self.maxPool(x)
        x = x.reshape(x.shape[0],-1)
        x = self.fc1(x)
        x = torch.sigmoid(x)
        x = torch.squeeze(x)
        return x

In [0]:
def evaluateModel(model, lossCriterion, X, y, batchSize):
    with torch.no_grad():
        n=batchSize
        batched = list(zip([X[i:i + n] for i in range(0, len(X), n)],
                          [y[i:i + n] for i in range(0, len(y), n)]))
        predList = []
        yList = []
        lossMeans = []
        for i,(batchX, batchY) in enumerate(batched):
            #print('x',type(X), X.dtype)
            #print('batchx',type(batchX), batchX.dtype)
            predProb = model(batchX)
            if np.isnan(predProb.cpu().detach().numpy()).any():
                print("AAAAAA a NAN")
                return
            loss = lossCriterion(predProb, batchY)
            batchPred = np.argmax(predProb, axis=1).tolist()
            predList = predList+batchPred
            batchYargmaxed = batchY.tolist()
            yList = yList+batchYargmaxed
            lossMeans.append(torch.mean(loss).item())

        acc = accuracy_score(yList, predList)
        report = classification_report(yList, predList)
        confMat = confusion_matrix(yList, predList)
        lossMean = sum(lossMeans)/float(len(lossMeans))

    return lossMean, confMat, acc, report

In [0]:
def trainModel(model, optimizer, criterion, train_X, train_y, val_X, val_y, batchSize, startEpoch, endEpoch, modelName):
    notifyEvery = 5

    checkmarkTime = time.time()

    n=batchSize
    batched = list(zip([train_X[i:i + n] for i in range(0, len(train_X), n)],
                      [train_y[i:i + n] for i in range(0, len(train_y), n)]))
    numBatches = len(batched)

    trainLosses= []
    valLosses =[]
    trainConfusionMatrices =[]
    valConfusionMatrices = []
    trainAccuracies = []
    valAccuracies = []
    trainReports = []
    valReports = []

    out_folder = pathlib.Path("/content/drive/My Drive/validated_clips/model_data/")
    out_folder = out_folder / modelName
    out_folder.mkdir(parents=True, exist_ok=True)

    print("number of batches", numBatches)
    for epoch in range(startEpoch, endEpoch):
        print("epoch:", epoch)
        for i,(batchX,batchy) in enumerate(batched):
            optimizer.zero_grad()
            output =  model(batchX)
            loss = criterion(output, batchy)
            loss.backward()
            optimizer.step()
            if np.isnan(output.cpu().detach().numpy()).any():
                print("AAAAAA a NAN")
                return
            if i%notifyEvery ==notifyEvery-1:
                print('[%d, %5d]' %
                  (epoch + 1, i + 1))
                timeTook = time.time() - checkmarkTime
                print("took", timeTook, "seconds for", notifyEvery, "batches")
                if(torch.cuda.is_available()):
                    print(torch.cuda.max_memory_allocated()/1e9, "GB of VRAM being used")
                checkmarkTime = time.time()
        trainLoss, trainConfMat, trainAccuracy, trainReport = evaluateModel(model,criterion,train_X, train_y, batchSize)
        valLoss, valConfMat, valAccuracy, valReport = evaluateModel(model, criterion,val_X, val_y, batchSize)
        trainLosses.append(trainLoss)
        valLosses.append(valLoss)
        trainConfusionMatrices.append(trainConfMat)
        valConfusionMatrices.append(valConfMat)
        trainAccuracies.append(trainAccuracy)
        valAccuracies.append(valAccuracy)
        trainReports.append(trainReport)
        valReports.append(valReport)
        print('val loss', valLoss, "train loss", trainLoss)
        print("train conf mat", trainConfMat)
        print("val conf mat", valConfMat)

        epoch_filename = "epoch"+str(epoch)+".pkl"
        modelEpochPath = out_folder / epoch_filename
        with modelEpochPath.open("wb") as f:
          torch.save(model, f, pickle_module=dill)
    print("finished training")
    
    trainLosses_path = out_folder / "trainLosses.pkl"
    with trainLosses_path.open("wb") as f:
      dill.dump(trainLosses, f)
    valLosses_path = out_folder/"valLosses.pkl"
    with valLosses_path.open("wb") as f:
      dill.dump(valLosses, f)
    trainConfusionMatrices_path = out_folder/"trainConfustionMatrices.pkl"
    with trainConfusionMatrices_path.open("wb") as f:
      dill.dump(trainConfusionMatrices, f)
    valConfusionMatrices_path = out_folder/"valConfustionMatrices.pkl"
    with valConfusionMatrices_path.open("wb") as f:
      dill.dump(valConfusionMatrices, f)
    
    trainAccuracies_path = out_folder / "trainAccuracies.pkl"
    with trainAccuracies_path.open("wb") as f:
      dill.dump(trainAccuracies, f)
    valAccuracies_path = out_folder/"valAccuracies.pkl"
    with valAccuracies_path.open("wb") as f:
      dill.dump(valAccuracies, f)
    trainReports_path = out_folder/"trainReports.pkl"
    with trainReports_path.open("wb") as f:
      dill.dump(trainReports, f)
    valReports_path = out_folder/"valReports.pkl"
    with valReports_path.open("wb") as f:
      dill.dump(valReports, f)

In [83]:
balanced_classes_folder = pathlib.Path("/content/drive/My Drive/validated_clips/balanced_classes/")

test_path = balanced_classes_folder / "test_data.pkl"
val_path = balanced_classes_folder / "val_data.pkl"
train_path = balanced_classes_folder / "train_data.pkl"

with open(test_path , "rb") as f:
  test_data = dill.load(f)
with open(val_path , "rb") as f:
  val_data = dill.load(f)
with open(train_path , "rb") as f:
  train_data = dill.load(f)

train_X = torch.as_tensor(train_data['mfccs'], dtype=torch.float)
val_X = torch.as_tensor(val_data['mfccs'], dtype=torch.float)
print(train_X.shape)
train_X = train_X.permute(0,2,1)
val_X = val_X.permute(0,2,1)

train_labels = train_data['labels']
val_labels = val_data['labels']

le = preprocessing.LabelEncoder()
enc =  preprocessing.OneHotEncoder(handle_unknown='ignore')
le.fit(train_labels)

val_y = le.transform(val_labels)
#val_y =enc.fit_transform(val_y.reshape(-1,1)).toarray() 
val_y = torch.as_tensor(val_y, dtype=torch.long)

train_y = le.transform(train_labels)
#train_y =enc.fit_transform(train_y.reshape(-1,1)).toarray()
train_y = torch.as_tensor(train_y, dtype=torch.long)

torch.Size([1040, 6234, 13])


In [88]:
seqLength = train_X.shape[2]
outsize = len(le.classes_)
print( seqLength, outsize)
mod = downwardSlope(seqLength, outsize)

if (torch.cuda.is_available()):
  device = "cuda:0"
else:
  device = "cpu"
  print("device is",device)

mod = mod.to(device)

criterion = nn.CrossEntropyLoss()
endEpoch = 3
learningRate = 0.001
optimizer = torch.optim.Adam(mod.parameters(), lr = learningRate)

startEpoch = 0
endEpoch = 3
batchSize = 100
modelName = "initial"

print('train_x',type(train_X), train_X.dtype)
print('train_y',type(train_y), train_y.dtype, train_y.shape)

6234 4
device is cpu
train_x <class 'torch.Tensor'> torch.float32
train_y <class 'torch.Tensor'> torch.int64 torch.Size([1040])


In [108]:
trainModel(mod, optimizer,criterion, train_X, train_y, val_X, val_y, batchSize, startEpoch, endEpoch, modelName)

number of batches 11
epoch: 0
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
[1,     5]
took 17.15275287628174 seconds for 5 batches
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
[1,    10]
took 14.264772415161133 seconds for 5 batches
<class 'torch.Tensor'>
epoch done
trainx shape torch.Size([1040, 13, 6234]) train_y shape torch.Size([1040])


  _warn_prf(average, modifier, msg_start, len(result))


val loss 1.6477762460708618 train loss 1.5308151028373025
train conf mat [[  0   0 241  24]
 [  0   0 215  42]
 [  0   0 197  63]
 [  0   0 196  62]]
val conf mat [[ 0  0 28  6]
 [ 0  0 31  3]
 [ 0  0 21 11]
 [ 0  0 31  5]]


  "type " + obj.__name__ + ". It won't be checked "


epoch: 1
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
[2,     5]
took 23.228028059005737 seconds for 5 batches
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
[2,    10]
took 18.467979669570923 seconds for 5 batches
<class 'torch.Tensor'>
epoch done
trainx shape torch.Size([1040, 13, 6234]) train_y shape torch.Size([1040])
val loss 1.5461490750312805 train loss 1.5173856236717918
train conf mat [[  0   0 259   6]
 [  0   5 234  18]
 [  0   0 230  30]
 [  0   0 228  30]]
val conf mat [[ 0  0 32  2]
 [ 0  0 31  3]
 [ 0  0 26  6]
 [ 0  0 34  2]]
epoch: 2
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
[3,     5]
took 19.846147298812866 seconds for 5 batches
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
[3,    10]
took 17