This notebook takes melspectrogram and tempogram as inputs and uses Resnet model . The model overfitted. Train to avoid overfitting. [Work in Progress]

In [1]:
# Resnet 

from __future__ import print_function, division
import os
import torch
import pandas as pd
import skimage
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from skimage.transform import resize
import torch.nn as nn

In [2]:
## Load Data
labels = pd.read_csv('./input_label.csv')
#class MelspecDataset(Dataset):
class Dataset(Dataset):
    def __init__(self, csv_file, root_dir, tempo_dir, transform=None):
        self.labels = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.tempo_dir = tempo_dir
        self.transform = transform

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.labels.iloc[idx, 0]+'.png')
        image = io.imread(img_name)
        
        l = self.labels.iloc[idx, 1:]       
        
        tempo_img_name = os.path.join(self.tempo_dir, self.labels.iloc[idx, 0]+'.png')
        tempo_image = io.imread(tempo_img_name)
        
        sample = {'image': image, 'label': l, 'tempo_image': tempo_image}

        if self.transform:
            sample = self.transform(sample)

        return sample

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, label, tempo_image = sample['image'], sample['label'], sample['tempo_image']
        

        # The input is a pandas Series
        if isinstance(label, pd.core.series.Series):
            label = label.values[0]
            
        label_id = ["Happy", "Angry", "Tender", "Scary", "Sad", "Funny"].index(label)
        
        #print (label, type(label))
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        
        #TODO: Convert image to RGB
        from PIL import Image
        im = Image.fromarray(image)
        image = np.array(im.convert('RGB'))
        image = image.transpose((2, 0, 1))
        #print (type(image), image.shape)
        image = resize(image, (3, 224, 224))
        #print("After resize",type(image), image.shape)
        

        
        # tempogram 
        im = Image.fromarray(tempo_image)
        tempo_image = np.array(im.convert('RGB'))
        tempo_image = tempo_image.transpose((2, 0, 1))
        tempo_image = resize(tempo_image, (3, 224, 224))
        
        return torch.from_numpy(image), torch.from_numpy(tempo_image), torch.from_numpy(np.array([label_id]))
        
transformed_dataset = Dataset(csv_file='./input_label.csv',
                              root_dir='./MelSpectrogram/',
                              tempo_dir='./tempogram/',
                              transform=ToTensor())

In [3]:
valid_size = 0.2
num_train = len(transformed_dataset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))

In [4]:
# Shuffle the indices
np.random.seed(123)
np.random.shuffle(indices)

# params
batch_size = 16
num_workers = 1

train_idx, valid_idx = indices[split:], indices[:split]

from torch.utils.data.sampler import SubsetRandomSampler
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)


train_loader = torch.utils.data.DataLoader(
        transformed_dataset, batch_size=batch_size, sampler=train_sampler,
        num_workers=num_workers)

valid_loader = torch.utils.data.DataLoader(
        transformed_dataset, batch_size=batch_size, sampler=valid_sampler,
        num_workers=num_workers)

In [5]:
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
num_epochs = 25
num_classes = 6
batch_size = 64
learning_rate = 0.001

In [6]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [7]:
#https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

import torch.nn as nn
import torch.utils.model_zoo as model_zoo


__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']


model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
}


def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.dropout1 = nn.Dropout(0.5)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dropout2 = nn.Dropout(0.5)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.dpo1 = nn.Dropout(0.5)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.dpo2 = nn.Droput(0.5)
        self.conv3 = conv1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.dpo3 = nn.Dropout(0.5)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.dpo1(0.5)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.dpo2(0.5)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)
        out = self.dpo3(0.5)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, zero_init_residual=False):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.dropout = nn.Dropout(0.5)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.fc_inp1 = nn.Linear(512 * block.expansion, 256)  
        
        # 2nd input - replicate all the layers needed for the Resnet architecture
        self.conv1_inp2 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1_inp2 = nn.BatchNorm2d(64)
        self.relu_inp2 = nn.ReLU(inplace=True)
        self.maxpool_inp2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.dropout_inp2 = nn.Dropout(0.5)
        self.layer1_inp2 = self._make_layer(block, 64, layers[0])
        self.layer2_inp2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3_inp2 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4_inp2 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool_inp2 = nn.AdaptiveAvgPool2d((1, 1))
        
        self.fc_inp2 = nn.Linear(512 * block.expansion, 256)
        
        self.fc_out = nn.Linear(256 + 256, num_classes)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        inp1 = x[0]
        inp2 = x[1]
        
        x = self.conv1(inp1)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_inp1(x)
        
        inp2 = self.conv1(inp2)
        inp2 = self.bn1(inp2)
        inp2 = self.relu(inp2)
        inp2 = self.dropout(inp2)
        inp2 = self.maxpool(inp2)

        inp2 = self.layer1(inp2)
        inp2 = self.layer2(inp2)
        inp2 = self.layer3(inp2)
        inp2 = self.layer4(inp2)

        inp2 = self.avgpool(inp2)
        inp2 = inp2.view(inp2.size(0), -1)
        
        inp2 = self.fc_inp2(inp2)
        
        class_out = self.fc_out(torch.cat((x, inp2), 1))
        return class_out


def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
    return model



def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
    return model


In [8]:
model = resnet18(num_classes=6)
model = model.to(device, dtype=torch.float)

In [10]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

num_epochs = 25
loss_tracker = []
valid_cfn_matrix = []

# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
    total = 0
    correct = 0
    for i, (mels_images, tempo_images, labels) in enumerate(train_loader):

        mels_images = mels_images.float()
        tempo_images = tempo_images.float()
        
        mels_images = mels_images.to(device, dtype=torch.float)
        tempo_images = tempo_images.to(device, dtype=torch.float)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model([mels_images, tempo_images])
        
        loss = criterion(outputs, labels.squeeze())
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        _x, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (labels.squeeze() == predicted).sum().item()
        loss_tracker.append(loss.item())
        if (i+1) % 100 == 0:
            #print (correct, total)
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))
            
          
    all_predictions = []
    all_labels = []

    with torch.no_grad():
        correct = 0
        total = 0
        for i, (mels_images, tempo_images, labels) in enumerate(valid_loader):
            mels_images = mels_images.float()
            tempo_images = tempo_images.float()
            mels_images = mels_images.to(device)
            tempo_images = tempo_images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model([mels_images, tempo_images])

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (labels.squeeze() == predicted).sum().item()

            all_predictions.extend(list(predicted.cpu().numpy()))
            all_labels.extend(list(labels.squeeze().cpu().numpy()))

        print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))
        
        import sklearn
        from sklearn.metrics import confusion_matrix
        cf_matrix = sklearn.metrics.confusion_matrix(all_predictions, all_labels)
        valid_cfn_matrix.append(cf_matrix)
        print (cf_matrix)

  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [1/25], Step [100/508], Loss: 1.5082
Epoch [1/25], Step [200/508], Loss: 1.3345
Epoch [1/25], Step [300/508], Loss: 1.2063
Epoch [1/25], Step [400/508], Loss: 0.7352
Epoch [1/25], Step [500/508], Loss: 0.9971


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.994578610152786 %
[[ 92   3  59   1   9  53]
 [ 18 174  22   8   5  12]
 [ 95   7 491  34 180  64]
 [  5  13  25 241  12   5]
 [  5   1  91  32 129   0]
 [ 38   3  29   1   2  70]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [2/25], Step [100/508], Loss: 1.0050
Epoch [2/25], Step [200/508], Loss: 0.9154
Epoch [2/25], Step [300/508], Loss: 1.3082
Epoch [2/25], Step [400/508], Loss: 1.3678
Epoch [2/25], Step [500/508], Loss: 1.1815


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.30458353868901 %
[[102   4  82   1  11  74]
 [ 29 171  21   2   4  21]
 [ 99   8 523  39 212  69]
 [  2  17  24 261  21   3]
 [  5   1  63  13  89   0]
 [ 16   0   4   1   0  37]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [3/25], Step [100/508], Loss: 1.2378
Epoch [3/25], Step [200/508], Loss: 0.9852
Epoch [3/25], Step [300/508], Loss: 0.9145
Epoch [3/25], Step [400/508], Loss: 0.9194
Epoch [3/25], Step [500/508], Loss: 1.4399


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.15672745194677 %
[[ 17   1  11   0   0  16]
 [ 19 173  18   6   2   9]
 [178  11 602  39 255 103]
 [  7  11  34 260  20   6]
 [  2   1  36  12  58   0]
 [ 30   4  16   0   2  70]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [4/25], Step [100/508], Loss: 0.9037
Epoch [4/25], Step [200/508], Loss: 0.8283
Epoch [4/25], Step [300/508], Loss: 0.8148
Epoch [4/25], Step [400/508], Loss: 1.1941
Epoch [4/25], Step [500/508], Loss: 1.0028


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 60.177427304090685 %
[[123   5  81   2  12  82]
 [ 18 180  19  11   4  18]
 [ 91   9 527  33 204  60]
 [  3   7  26 258  23   4]
 [  5   0  51  12  93   0]
 [ 13   0  13   1   1  40]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [5/25], Step [100/508], Loss: 0.7577
Epoch [5/25], Step [200/508], Loss: 0.8144
Epoch [5/25], Step [300/508], Loss: 0.6037
Epoch [5/25], Step [400/508], Loss: 0.6433
Epoch [5/25], Step [500/508], Loss: 0.8236


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.551010349926074 %
[[ 59   2  26   0   1  54]
 [ 10 162   8   5   3   9]
 [155  14 550  27 192  99]
 [  7  18  35 266  26   5]
 [  7   1  86  18 114   0]
 [ 15   4  12   1   1  37]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [6/25], Step [100/508], Loss: 0.7743
Epoch [6/25], Step [200/508], Loss: 1.0636
Epoch [6/25], Step [300/508], Loss: 0.9157
Epoch [6/25], Step [400/508], Loss: 0.6521
Epoch [6/25], Step [500/508], Loss: 0.5838


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 59.4381468703795 %
[[ 61   3  23   1   2  33]
 [ 22 160   9   3   3  15]
 [124   4 549  25 201  87]
 [ 10  26  42 271  28   7]
 [  3   1  68  16 103   0]
 [ 33   7  26   1   0  62]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [7/25], Step [100/508], Loss: 0.8836
Epoch [7/25], Step [200/508], Loss: 0.8607
Epoch [7/25], Step [300/508], Loss: 1.0104
Epoch [7/25], Step [400/508], Loss: 1.0230
Epoch [7/25], Step [500/508], Loss: 0.7263


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 61.409561360276 %
[[ 74   3  37   1   5  35]
 [ 14 179  15   9   3  12]
 [102   7 541  28 218  55]
 [  3   5  21 270  29   1]
 [  3   0  57   7  81   0]
 [ 57   7  46   2   1 101]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [8/25], Step [100/508], Loss: 0.9924
Epoch [8/25], Step [200/508], Loss: 0.7837
Epoch [8/25], Step [300/508], Loss: 1.3640
Epoch [8/25], Step [400/508], Loss: 1.0731
Epoch [8/25], Step [500/508], Loss: 1.1052


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 62.24741251848201 %
[[126   4  79   1  10  59]
 [ 10 170   8   2   3   6]
 [ 62   3 458  20 158  39]
 [  6  18  40 277  23   7]
 [  5   1  95  15 139   0]
 [ 44   5  37   2   4  93]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [9/25], Step [100/508], Loss: 1.0381
Epoch [9/25], Step [200/508], Loss: 1.1413
Epoch [9/25], Step [300/508], Loss: 0.8889
Epoch [9/25], Step [400/508], Loss: 0.9971
Epoch [9/25], Step [500/508], Loss: 1.0886


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 59.635288319369145 %
[[ 93   2  78   1  14  40]
 [  7 178  14   7   3   8]
 [ 56   5 497  28 234  31]
 [  3   6  36 277  36   3]
 [  2   0  22   2  43   0]
 [ 92  10  70   2   7 122]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [10/25], Step [100/508], Loss: 0.8995
Epoch [10/25], Step [200/508], Loss: 0.9923
Epoch [10/25], Step [300/508], Loss: 1.0915
Epoch [10/25], Step [400/508], Loss: 0.6806
Epoch [10/25], Step [500/508], Loss: 0.9172


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 61.705273533760476 %
[[110   4  63   1   7  59]
 [ 12 178  13   5   3   7]
 [ 93   6 491  21 178  63]
 [  1  10  23 266  14   3]
 [  6   1 103  23 135   0]
 [ 31   2  24   1   0  72]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [11/25], Step [100/508], Loss: 1.0881
Epoch [11/25], Step [200/508], Loss: 1.1510
Epoch [11/25], Step [300/508], Loss: 0.7209
Epoch [11/25], Step [400/508], Loss: 1.0517
Epoch [11/25], Step [500/508], Loss: 0.8896


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 60.91670773780187 %
[[ 93   7  54   1   5  57]
 [  7 170   8   5   2   6]
 [118  14 591  29 261  74]
 [  5   7  28 277  26   4]
 [  1   0  18   2  42   0]
 [ 29   3  18   3   1  63]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [12/25], Step [100/508], Loss: 0.6244
Epoch [12/25], Step [200/508], Loss: 0.6785
Epoch [12/25], Step [300/508], Loss: 0.8939
Epoch [12/25], Step [400/508], Loss: 0.9069
Epoch [12/25], Step [500/508], Loss: 0.9085


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 61.113849186791526 %
[[ 78   1  42   1   6  27]
 [  9 176   9   7   2   9]
 [ 85   7 520  38 212  45]
 [  1   7  26 260  29   0]
 [  4   1  57   5  83   0]
 [ 76   9  63   6   5 123]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [13/25], Step [100/508], Loss: 0.8532
Epoch [13/25], Step [200/508], Loss: 1.0428
Epoch [13/25], Step [300/508], Loss: 0.5554
Epoch [13/25], Step [400/508], Loss: 0.9179
Epoch [13/25], Step [500/508], Loss: 0.7006


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 61.50813208477082 %
[[ 86   2  35   2   3  38]
 [ 10 175  13  19   4   8]
 [109  11 541  43 197  58]
 [  2   6  19 236  19   3]
 [  3   1  80  16 113   0]
 [ 43   6  29   1   1  97]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [14/25], Step [100/508], Loss: 0.6496
Epoch [14/25], Step [200/508], Loss: 0.8041
Epoch [14/25], Step [300/508], Loss: 0.7096
Epoch [14/25], Step [400/508], Loss: 0.7082
Epoch [14/25], Step [500/508], Loss: 0.4399


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 61.113849186791526 %
[[ 95   4  44   1   3  63]
 [ 12 178  16  13   3   6]
 [111   9 581  41 241  69]
 [  0   4  15 241   8   2]
 [  3   1  43  19  81   0]
 [ 32   5  18   2   1  64]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [15/25], Step [100/508], Loss: 1.0922
Epoch [15/25], Step [200/508], Loss: 0.7232
Epoch [15/25], Step [300/508], Loss: 0.8131
Epoch [15/25], Step [400/508], Loss: 1.0983
Epoch [15/25], Step [500/508], Loss: 0.7293


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 60.37456875308033 %
[[122   6  91   3   5  62]
 [ 12 174  14  12   4   6]
 [ 66   7 484  26 212  41]
 [  1   7  25 262  20   2]
 [  5   1  71  11  90   0]
 [ 47   6  32   3   6  93]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [16/25], Step [100/508], Loss: 0.5527
Epoch [16/25], Step [200/508], Loss: 1.1898
Epoch [16/25], Step [300/508], Loss: 0.5377
Epoch [16/25], Step [400/508], Loss: 0.6982
Epoch [16/25], Step [500/508], Loss: 0.6071


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 62.24741251848201 %
[[ 90   2  50   0   5  43]
 [  6 174   9   5   1   4]
 [116  14 593  37 248  79]
 [  1   8  18 267  17   3]
 [  2   1  32   8  64   0]
 [ 38   2  15   0   2  75]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [17/25], Step [100/508], Loss: 0.2821
Epoch [17/25], Step [200/508], Loss: 0.9562
Epoch [17/25], Step [300/508], Loss: 0.7836
Epoch [17/25], Step [400/508], Loss: 0.6032
Epoch [17/25], Step [500/508], Loss: 0.8594


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 61.21241991128635 %
[[122  12  76   0   9  50]
 [  5 151   3   2   2   2]
 [ 79  11 491  20 184  56]
 [  3  22  41 281  30   9]
 [  5   1  83  12 110   0]
 [ 39   4  23   2   2  87]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [18/25], Step [100/508], Loss: 0.5045
Epoch [18/25], Step [200/508], Loss: 0.5139
Epoch [18/25], Step [300/508], Loss: 0.4586
Epoch [18/25], Step [400/508], Loss: 1.0340
Epoch [18/25], Step [500/508], Loss: 0.3557


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 60.6209955643174 %
[[ 99   2  71   0   8  56]
 [ 12 170   7   7   2   4]
 [ 92  10 533  28 215  64]
 [  3  14  23 258  13   7]
 [  3   1  61  23  97   0]
 [ 44   4  22   1   2  73]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [19/25], Step [100/508], Loss: 0.5472
Epoch [19/25], Step [200/508], Loss: 1.1497
Epoch [19/25], Step [300/508], Loss: 0.6954
Epoch [19/25], Step [400/508], Loss: 0.2894
Epoch [19/25], Step [500/508], Loss: 0.5102


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.551010349926074 %
[[ 94  17  61   2   8  45]
 [  6 153   5   5   3   4]
 [ 97  22 444  39 147  57]
 [  0   4  17 235   7   1]
 [  9   1 163  33 169   4]
 [ 47   4  27   3   3  93]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [20/25], Step [100/508], Loss: 0.2399
Epoch [20/25], Step [200/508], Loss: 0.6856
Epoch [20/25], Step [300/508], Loss: 0.5883
Epoch [20/25], Step [400/508], Loss: 0.2945
Epoch [20/25], Step [500/508], Loss: 0.7273


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.94529324790537 %
[[136   6 116   1  28  56]
 [  7 172  12   7   1   6]
 [ 57  10 376  16 140  29]
 [  3   8  27 269  22   9]
 [  5   2 152  21 142   3]
 [ 45   3  34   3   4 101]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [21/25], Step [100/508], Loss: 0.5265
Epoch [21/25], Step [200/508], Loss: 0.3711
Epoch [21/25], Step [300/508], Loss: 0.2861
Epoch [21/25], Step [400/508], Loss: 0.4056
Epoch [21/25], Step [500/508], Loss: 0.4661


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.649581074420894 %
[[112  13  93   1  11  53]
 [  8 159   5   6   2   3]
 [ 71   9 438  26 177  35]
 [  3  15  32 268  29   6]
 [  8   1 101  13 109   3]
 [ 51   4  48   3   9 104]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [22/25], Step [100/508], Loss: 0.5863
Epoch [22/25], Step [200/508], Loss: 0.2693
Epoch [22/25], Step [300/508], Loss: 0.3446
Epoch [22/25], Step [400/508], Loss: 0.6249
Epoch [22/25], Step [500/508], Loss: 0.0888


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 58.600295712173484 %
[[129   7  92   0  16  54]
 [  6 162   8   9   1   4]
 [ 78  12 430  30 163  63]
 [  1  14  18 248  12   2]
 [  8   0 142  24 143   4]
 [ 31   6  27   6   2  77]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [23/25], Step [100/508], Loss: 0.2862
Epoch [23/25], Step [200/508], Loss: 0.2375
Epoch [23/25], Step [300/508], Loss: 0.2142
Epoch [23/25], Step [400/508], Loss: 0.5695
Epoch [23/25], Step [500/508], Loss: 0.2709


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 59.33957614588467 %
[[106   3  79   0   9  39]
 [  4 168   5   9   3   5]
 [ 83  15 486  40 206  54]
 [  3  10   9 243  12   3]
 [  8   1  98  18 102   4]
 [ 49   4  40   7   5  99]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [24/25], Step [100/508], Loss: 0.1058
Epoch [24/25], Step [200/508], Loss: 0.1713
Epoch [24/25], Step [300/508], Loss: 0.2646
Epoch [24/25], Step [400/508], Loss: 0.1421
Epoch [24/25], Step [500/508], Loss: 0.1290


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 57.91030064070971 %
[[ 96   9  66   3   7  50]
 [ 10 166  14   9   5   7]
 [107  16 479  37 200  55]
 [  2   5  24 249  15   6]
 [  4   0 109  10 103   4]
 [ 34   5  25   9   7  82]]


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Epoch [25/25], Step [100/508], Loss: 0.0658
Epoch [25/25], Step [200/508], Loss: 0.0556
Epoch [25/25], Step [300/508], Loss: 0.5709
Epoch [25/25], Step [400/508], Loss: 0.0634
Epoch [25/25], Step [500/508], Loss: 0.0413


  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 57.71315919172006 %
[[120   8 108   0  16  58]
 [  6 152   6   4   2   3]
 [ 84  23 444  33 177  57]
 [  2  12  29 264  25   4]
 [  9   1 107  12 110   1]
 [ 32   5  23   4   7  81]]


In [13]:
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
all_predictions = []
all_labels = []
with torch.no_grad():
    correct = 0
    total = 0
    for i, (mels_images, tempo_images, labels) in enumerate(valid_loader):
        mels_images = mels_images.float()
        tempo_images = tempo_images.float()
        
        mels_images = mels_images.to(device)
        tempo_images = tempo_images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model([mels_images, tempo_images])
 
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (labels.squeeze() == predicted).sum().item()

        all_predictions.extend(list(predicted.cpu().numpy()))
        all_labels.extend(list(labels.squeeze().cpu().numpy()))

    print('Test Accuracy of the model on the test images: {} %'.format(100 * correct / total))

  warn("The default mode, 'constant', will be changed to 'reflect' in "


Test Accuracy of the model on the test images: 48.74322326269098 %


In [14]:
import sklearn
from sklearn.metrics import confusion_matrix
print (sklearn.metrics.confusion_matrix(all_predictions, all_labels))

[[146  39 158   7  44  72]
 [  2  80   1   3   1   0]
 [ 51  21 381  77 159  43]
 [  4   6  13 176   5   0]
 [  5   1 119  43 122   5]
 [ 45  54  45  11   6  84]]


``` Previous run 
Test Accuracy of the model on the test images: 60.177427304090685 %
[[ 94   2  68   0   5  41]
 [ 13 178  23  20   4  14]
 [ 83   7 540  36 244  50]
 [  1   5  21 249  18   0]
 [  3   0  33   9  61   0]
 [ 59   9  32   3   5  99]]
```

Note --

model.eval - acc =48.74%
with torch.no_grad(): - acc = 62.25%