# PyTorch Exploration – CNN Prototype, Fit/Predict, Metrics

In [6]:
import audiomod
import ptmod
# from pymongo import MongoClient
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
# import torchvision
# from torchvision import transforms, utils
import torch.utils.data as data_utils
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
from collections import OrderedDict, defaultdict
import pickle
import os

from sklearn import metrics

%matplotlib inline

## Setup

As detailed in previous notebook, I pull a datagroup from the DB, then a smaller subgroup for testing, split into train/test, and create dataset objects.

In [7]:
sax1203_datagroup = audiomod.pull_datagroup_from_db('sax1203')

sax1203_datagroup.shape

(920, 2)

In [17]:
# pull a smaller sample for PoC run
sub_datagroup = sax1203_datagroup.sample(100)
sub_datagroup.actual.value_counts()

0    53
1    47
Name: actual, dtype: int64

In [20]:
train_df, test_df = audiomod.tts(sub_datagroup)

# this scaling is pretty tiny, but it'll do the trick for a dry run
train_sub = ptmod.SpectroDataset(train_df, scaling=0.125)
test_sub = ptmod.SpectroDataset(test_df, scaling=0.125)

print("Train set length:", len(train_sub))
print("Test set length:", len(test_sub))

Train set length: 82
Test set length: 18


## CNN Design

### N-pixels going into the first FC layer?

The `reduce_axis` function in the `ptmod` module computes the number of pixels along a single axis, given original length, filter length, and stride:

In [21]:
ptmod.reduce_axis(28,5,1)

24

In [22]:
ptmod.reduce_axis(7,5,2)

2

Then, `cnn_pixels_out` gives us the total number of values that would be fed to the first FC layer, given input dimensions and kernel/stride/filters of each convolutional or max-pooling layer. Consider the 28x28 images in MNIST Fashion and the four layers of the CNN:

In [24]:
mnist_cnn_layers = (
    (5,1,10),
    (2,2,0),
    (5,1,20),
    (2,2,0)
)

ptmod.cnn_pixels_out((1,28,28), mnist_cnn_layers)

1 x 28 x 28
10 x 24 x 24
10 x 12 x 12
20 x 8 x 8
20 x 4 x 4


320

And the net from the PyTorch tutorials, optimized for a 32x32 image:

In [25]:
tutorial_layers = (
    (5,1,6),
    (2,2,0),
    (5,1,16),
    (2,2,0)
)

ptmod.cnn_pixels_out((1,32,32), tutorial_layers, drop_last=True)

1 x 32 x 32
6 x 28 x 28
6 x 14 x 14
16 x 10 x 10
16 x 5 x 5


400

With even the tiny 1/8-scale spectros, the output grows significantly when using the same cnn layers as the MNIST example in the PyTorch tutorials:

In [26]:
# PyTorch drops pixels that would require a partial stride to calculate
ptmod.cnn_pixels_out((1,64,54), mnist_cnn_layers, drop_last=True)

1 x 64 x 54
10 x 60 x 50
10 x 30 x 25
20 x 26 x 21
20 x 13 x 10


2600

Playing with some different values:

In [27]:
cnn_layers_test = (
    (5,1,10),
    (2,2,0),
    (5,1,20),
    (2,2,0)
)

ptmod.cnn_pixels_out((1,64,54), cnn_layers_test, drop_last=True)

1 x 64 x 54
10 x 60 x 50
10 x 30 x 25
20 x 26 x 21
20 x 13 x 10


2600

Perhaps reasonable... let's try it.

### CNN Fit

In [80]:
### This assumes a (1,64,54) tensor
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # (in channels, out channels, kernel, stride=s)
        self.conv1 = nn.Conv2d(1, 10, 5, stride=1)
        # (2x2 kernel, stride=2 -- stride defaults to kernel)
        self.pool1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(10, 20, 5, stride=1)
        self.pool2 = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(2600, 50)
        self.fc2 = nn.Linear(50, 2)
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # need to reshape for fully connected layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(x)
        return x


In [81]:
# instantiate model, set loss criterion and optimizer
cnn_1 = CNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(cnn_1.parameters(), lr=0.01) # set momentum if desired

In [82]:
print(cnn_1)

CNN (
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (2600 -> 50)
  (fc2): Linear (50 -> 2)
)


In [83]:
# re-instantiate the train loader
train_loader = data_utils.DataLoader(train_sub, 
                                     batch_size=4, 
                                     shuffle=True,
                                     num_workers=2,
                                     drop_last=True)

In [84]:
nb_epochs = 10
# minibatches = 5000

for epoch in range(nb_epochs):
    print("Epoch", epoch+1)
    running_loss = 0.0
    then = time.perf_counter()
    for i, data in enumerate(train_loader, 1):
        sub_then = time.perf_counter()
        # separate input data and labels, dump chunk IDs
        inputs, labels, _ = data
        # wrap in Variable for GD
        inputs, labels = Variable(inputs), Variable(labels)
        # zero parameter gradients, else accumulate
        optimizer.zero_grad()
        # forward prop
        outputs = cnn_1(inputs)
        # calculate loss
        loss = criterion(outputs, labels)
        # backprop
        loss.backward()
        # update weights
        optimizer.step()         
        #verbosity
        sub_now = time.perf_counter()
        print("\r * {} loss: {:.3f}\tTime: {:.3f} ms"
              .format(i, loss.data[0], (sub_now-sub_then)*1000), end='')
        running_loss += loss.data[0]
    now = time.perf_counter()
    print("\r * Avg loss: {:.3f}\tTime: {:.3f} ms"
          .format(running_loss/i, (now-then)*1000))

#         # print running loss
#         running_loss += loss.data[0]
#         if i%minibatches == minibatches:
#             # print every 5,000 minibatches or whatever you set 'minibatches' equal to
#             print('[%d, %5d] loss: %.3f' % (epoch+1, i, running_loss/minibatches))
#             running_loss = 0.0
            
print('\nTraining Complete!')

Epoch 1
 * Avg loss: 0.692	Time: 882.398 ms
Epoch 2
 * Avg loss: 0.691	Time: 733.078 ms
Epoch 3
 * Avg loss: 0.692	Time: 643.584 ms
Epoch 4
 * Avg loss: 0.691	Time: 730.601 ms
Epoch 5
 * Avg loss: 0.690	Time: 744.388 ms
Epoch 6
 * Avg loss: 0.691	Time: 609.332 ms
Epoch 7
 * Avg loss: 0.690	Time: 602.612 ms
Epoch 8
 * Avg loss: 0.690	Time: 607.077 ms
Epoch 9
 * Avg loss: 0.689	Time: 673.601 ms
Epoch 10
 * Avg loss: 0.688	Time: 715.653 ms

Training Complete!


In [33]:
# show learnable parameters for model
params = list(cnn_1.parameters())
# print(len(params))
# print(params[0].size())  # conv1's .weight
for param in params:
    print(param.size())

torch.Size([10, 1, 5, 5])
torch.Size([10])
torch.Size([20, 10, 5, 5])
torch.Size([20])
torch.Size([50, 2600])
torch.Size([50])
torch.Size([2, 50])
torch.Size([2])


In [34]:
# view output, should be the predicted probability for each category
train_batch = next(iter(train_loader))

input_var = Variable(train_batch[0])
out = cnn_1(input_var)
print(out)

Variable containing:
 0.5311  0.4689
 0.5008  0.4992
 0.4878  0.5122
 0.5653  0.4347
[torch.FloatTensor of size 4x2]



### Prediction

In [35]:
classes = ["no sax", "sax"]

In [36]:
test_loader = data_utils.DataLoader(test_sub, 
                                    batch_size=4, 
                                    shuffle=False, # set for False for test set
                                    num_workers=2,
                                    drop_last=False)

In [37]:
test_iter = iter(test_loader)
spectros, labels, chunk_ids = test_iter.next()

# show ground truth
print("Groundtruth")
print(labels.size(0))
for j in range(labels.size(0)):
    print("{}: {}".format(chunk_ids[j], classes[labels[j]]))

# from PyTorch tutorial
# print('GroundTruth: ', ' '.join('%8s' % classes[labels[j]] for j in range(4)))

Groundtruth
4
015094: sax
008183: sax
017245: sax
016814: sax


In [38]:
outputs = cnn_1(Variable(spectros))
print(outputs.data)


 0.5059  0.4941
 0.4927  0.5073
 0.5292  0.4708
 0.4948  0.5052
[torch.FloatTensor of size 4x2]



In [39]:
_, predicted = torch.max(outputs.data, 1)

print("Predictions")
for j in range(4):
    print("{}: {}".format(chunk_ids[j], classes[predicted[j]]))

Predictions
015094: no sax
008183: sax
017245: no sax
016814: sax


In [41]:
correct = 0
total = 0
results = {}
for data in test_loader:
    spectros, labels, chunk_ids = data
    outputs = cnn_1(Variable(spectros))
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum()
    for c_id, gt, pred, out in zip(chunk_ids, labels, predicted, outputs.data):
        results[c_id] = (gt, pred, out)

print('Accuracy of the network on the test spectros: %d %%' % (
    100 * correct / total))

Accuracy of the network on the test spectros: 61 %


In [42]:
print(results)

{'015094': (1, 0, 
 0.5059
 0.4941
[torch.FloatTensor of size 2]
), '008183': (1, 1, 
 0.4927
 0.5073
[torch.FloatTensor of size 2]
), '017245': (1, 0, 
 0.5292
 0.4708
[torch.FloatTensor of size 2]
), '016814': (1, 1, 
 0.4948
 0.5052
[torch.FloatTensor of size 2]
), '015643': (0, 0, 
 0.5366
 0.4634
[torch.FloatTensor of size 2]
), '010724': (0, 0, 
 0.5362
 0.4638
[torch.FloatTensor of size 2]
), '017316': (1, 0, 
 0.5046
 0.4954
[torch.FloatTensor of size 2]
), '011646': (0, 0, 
 0.5328
 0.4672
[torch.FloatTensor of size 2]
), '016610': (1, 0, 
 0.5240
 0.4760
[torch.FloatTensor of size 2]
), '007566': (1, 1, 
 0.4869
 0.5131
[torch.FloatTensor of size 2]
), '015074': (0, 0, 
 0.5148
 0.4852
[torch.FloatTensor of size 2]
), '012879': (1, 0, 
 0.5117
 0.4883
[torch.FloatTensor of size 2]
), '011620': (0, 1, 
 0.4828
 0.5172
[torch.FloatTensor of size 2]
), '014708': (0, 0, 
 0.5318
 0.4682
[torch.FloatTensor of size 2]
), '014756': (0, 0, 
 0.5481
 0.4519
[torch.FloatTensor of size 

In [43]:
y = []
y_hat = []
for val in results.values():
    y.append(val[0])
    y_hat.append(val[1])

print(y)
print(y_hat)

[1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0]
[0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0]


### Developing Fit and Predict Functions

In [44]:
def fit(cnn, 
        dataset, 
        optimizer, 
        criterion, 
        num_epochs, 
        batch_size=4, 
        minibatches=1):
    """
    Runs feed-forward and back-prop to train CNN model.
    ---
    IN
    cnn: CNN instance 
    dataset: built SpectroDataset object
    optimizer: PyTorch optimizer for back-prop
    criterion: PyTorch loss object for loss metric
    num_epochs: number of times to cycle through data (int)
    batch_size: number of records per batch (int)
    minibatches: print loss and time every n minibatches (int)
    NO OUT
    """
    
    train_loader = data_utils.DataLoader(
        dataset, 
        batch_size=4, 
        shuffle=True,
        num_workers=2,
        drop_last=True
    )

    for epoch in range(num_epochs):
        print("Epoch", epoch+1)
        running_loss = 0.0
        then = time.perf_counter()
        for i, data in enumerate(train_loader, 1):
            sub_then = time.perf_counter()
            # separate input data and labels, dump chunk IDs
            spectros, labels, _ = data
            # wrap in Variable for GD
            spectros, labels = Variable(spectros), Variable(labels)
            # zero parameter gradients, else accumulate
            optimizer.zero_grad()
            # forward prop
            outputs = cnn(spectros)
            # calculate loss
            loss = criterion(outputs, labels)
            # backprop
            loss.backward()
            # update weights
            optimizer.step()         
            #verbosity
            sub_now = time.perf_counter()
            print("\r * {} loss: {:.3f}\tTime: {:.3f} ms"
                  .format(i, loss.data[0], (sub_now-sub_then)*1000), end='')
            running_loss += loss.data[0]
    #         running_loss += loss.data[0]
    #         if i%minibatches == minibatches:
    #             # print every 5,000 minibatches or whatever you set 'minibatches' equal to
    #             print('[%d, %5d] loss: %.3f' % (epoch+1, i, running_loss/minibatches))
    #             running_loss = 0.0
        now = time.perf_counter()
        print("\r * Avg loss: {:.3f}\tTime: {:.3f} ms"
              .format(running_loss/i, (now-then)*1000))
    print('\nTraining Complete!')

In [45]:
def predict(cnn, dataset, batch_size=4, res_format='df'):
    """
    Predicts values on trained CNN.
    ---
    IN
    cnn: trained CNN instance
    dataset: built SpectroDataset object
    batch_size: number of records per batch
    res_format: results format, either 'df' for pandas dataframe or 'dict'
        for dictionary (str)
    OUT
    results: if 'dict', dictionary with chunk ID as key, and a tuple of (actual,
        predicted, output_array) as value (dict); if 'df', pandas dataframe
    """
    
    test_loader = data_utils.DataLoader(
        dataset, 
        batch_size=4, 
        shuffle=False, # set for False for test set
        num_workers=2
    )
    
    results = {}
    
    for data in test_loader:
        spectros, labels, chunk_ids = data
        outputs = cnn_1(Variable(spectros))
        _, pred = torch.max(outputs.data, 1)
        for c_id, y, y_hat, out in zip(chunk_ids, labels, pred, outputs.data):
            results[c_id] = (y, y_hat, out)
            
    if res_format == 'df':
        results = results_to_df(results)
    
    return results

In [46]:
def results_to_df(results):
    """
    Converts predict results to Pandas dataframe.
    ---
    IN
    results: dictionary generated by results function (dict)
    OUT
    df: pandas dataframe of results 
    """

    cols = ['chunk_id', 'actual', 'pred', 'e0', 'e1']
    results_trans = OrderedDict.fromkeys(cols)
    for k in results_trans.keys():
        results_trans[k] = []

    for k, v in results.items():
        for col, val in zip(cols, [k, v[0], v[1], v[2][0], v[2][1]]):
            results_trans[col].append(val)
    
    df = pd.DataFrame(results_trans)
    
    return df

In [47]:
fit(cnn_1, 
    train_sub, 
    optim.SGD(cnn_1.parameters(), lr=0.01), 
    nn.CrossEntropyLoss(), 
    10)

Epoch 1
 * Avg loss: 0.684	Time: 770.440 ms
Epoch 2
 * Avg loss: 0.685	Time: 622.137 ms
Epoch 3
 * Avg loss: 0.683	Time: 628.895 ms
Epoch 4
 * Avg loss: 0.682	Time: 733.074 ms
Epoch 5
 * Avg loss: 0.680	Time: 832.691 ms
Epoch 6
 * Avg loss: 0.681	Time: 852.088 ms
Epoch 7
 * Avg loss: 0.679	Time: 620.179 ms
Epoch 8
 * Avg loss: 0.679	Time: 663.360 ms
Epoch 9
 * Avg loss: 0.677	Time: 622.065 ms
Epoch 10
 * Avg loss: 0.677	Time: 609.768 ms

Training Complete!


In [48]:
train_results = predict(cnn_1, train_sub)
test_results = predict(cnn_1, test_sub)

### Custom Config of Class?

In [54]:
### Trying for custom input and fit/predict as methods
class CNN_cpcpff(nn.Module):
    """
    Pass input params as a dictionary where each item is a layer
    and each value is a list, following this convention:
    
    Convolutional: c1: [kernel, stride, channels_out]
    Max Pooling: p1: [kernel, stride]
    Fully Connected: f1: [channels_in, channels_out]
    
    For example:
    
        params = {
            'c1': [5,1,10],
            'p1': [2,2],
            'c2': [5,1,20],
            'p2': [2,2],
            'f1': [2600,50],
            'f2': [50,2]
        }
    
    All values must be integers.
    """
    
    def __init__(self, params, rs=23):
        super(CNN_cpcpff, self).__init__()
        self.p = params
        self.rs = rs
        self.seed_gen = torch.manual_seed(self.rs)
        # (in channels, out channels, kernel, stride=s)
        self.conv1 = nn.Conv2d(1, 
                               self.p['c1'][2], 
                               self.p['c1'][0], 
                               stride=self.p['c1'][1])
        # (2x2 kernel, stride=2 -- stride defaults to kernel)
        self.pool1 = nn.MaxPool2d(self.p['p1'][0], self.p['p1'][1])
        self.conv2 = nn.Conv2d(self.p['c1'][2], 
                               self.p['c2'][2], 
                               self.p['c2'][0], 
                               stride=self.p['c2'][1])
        self.pool2 = nn.MaxPool2d(self.p['p2'][0], self.p['p2'][1])
        self.fc1 = nn.Linear(self.p['f1'][0], self.p['f1'][1])
        self.fc2 = nn.Linear(self.p['f2'][0], self.p['f2'][1])
        # self.seed_gen = None
        
    def forward(self, x):
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)  # need to reshape for fully connected layer
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(x)
        return x

    def save_myself(self, fname, dir_out='../data'):
        """
        Saves current object as a .pkl file.
        ---
        fname: filename of choice (str)
        dir_out: path to save directory (str)
        """
        
        fpath = os.path.join(dir_out, fname + '.p')
        with open(fpath, 'wb') as pf:
            pickle.dump(self, pf)

In [55]:
params_init = {
    'c1': [5,1,10],
    'p1': [2,2],
    'c2': [5,1,20],
    'p2': [2,2],
    'f1': [2600,50],
    'f2': [50,2]
}

In [72]:
cnn_test = CNN_cpcpff(params_init, rs=42)

In [73]:
print(cnn_test)

CNN_cpcpff (
  (conv1): Conv2d(1, 10, kernel_size=(5, 5), stride=(1, 1))
  (pool1): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (conv2): Conv2d(10, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
  (fc1): Linear (2600 -> 50)
  (fc2): Linear (50 -> 2)
)


In [74]:
fit(cnn_test, 
    train_sub, 
    optim.SGD(cnn_test.parameters(), lr=0.01), 
    nn.CrossEntropyLoss(), 
    10)

Epoch 1
 * Avg loss: 0.695	Time: 794.006 ms
Epoch 2
 * Avg loss: 0.693	Time: 646.161 ms
Epoch 3
 * Avg loss: 0.692	Time: 677.797 ms
Epoch 4
 * Avg loss: 0.691	Time: 642.974 ms
Epoch 5
 * Avg loss: 0.691	Time: 618.255 ms
Epoch 6
 * Avg loss: 0.690	Time: 706.024 ms
Epoch 7
 * Avg loss: 0.688	Time: 670.740 ms
Epoch 8
 * Avg loss: 0.687	Time: 684.706 ms
Epoch 9
 * Avg loss: 0.687	Time: 709.745 ms
Epoch 10
 * Avg loss: 0.686	Time: 685.959 ms

Training Complete!


In [62]:
res_train_df = predict(cnn_test, train_sub)
res_test_df = predict(cnn_test, test_sub)

In [161]:
# cnn_test.save_myself('test_model')

It works!

### Metrics

In [152]:
def get_scores(train_df, test_df, verbose=True):
    """
    Calculates accuracy, recall, and specificity for train and test
    predictions.
    ### add precision?
    ---
    IN
    train_df: predict results df of train set
    test_df: predict results df of test set
    OUT
    scores: scores bundle
    """
    
    scores = defaultdict(list)
    score_types = ['acc', 'rec', 'spec']
    
    for df in [train_df, test_df]:
        df_scores = []
        df_scores.append(
            metrics.accuracy_score(df.actual, df.pred))
        df_scores.append(
            metrics.recall_score(df.actual, df.pred))
        df_scores.append(
            metrics.recall_score(df.actual, df.pred, pos_label=0))
#         df_scores.append(df[df.actual == df.pred].shape[0] / df.shape[0])
#         df_scores.append(df[(df.actual == 1) & (df.pred == 1)].shape[0] /
#                          df[df.actual == 1].shape[0])
#         df_scores.append(df[(df.actual == 0) & (df.pred == 0)].shape[0] /
#                          df[df.actual == 0].shape[0])
        for n, s in zip(score_types, df_scores):
            scores[n].append(s)
        
    if verbose:
        print("MODEL SCORES")
        print("Score\tTrain\tTest")
        print("-" * 24)
        for score in score_types:
            print("{}\t{:.3f}\t{:.3f}".format(
                score.capitalize(), 
                scores[score][0],
                scores[score][1])
            )
        
    return scores    

In [153]:
scores = get_scores(res_train_df, res_test_df, verbose=True)

MODEL SCORES
Score	Train	Test
------------------------
Acc	0.512	0.444
Rec	0.024	0.000
Spec	1.000	1.000


Into the module with it all and onto the real stuff...