In [1]:
import os
import librosa
import librosa.display
import IPython.display
import matplotlib.pyplot as plt
import pandas as pd
import time
import numpy as np
%matplotlib inline

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.nn import functional as F
from torch.utils.data import Dataset,DataLoader

In [3]:
class MyDataset(Dataset):
    def __init__(self,train=True):
        self.train = train
        self.root = 'aug_data/'
        if train:
            self.wave_list = os.listdir(self.root+'train/')
            self.train_targets = pd.read_csv('data/train_answer.csv').drop(['id'],axis=1).values
        else:
            self.wave_list = os.listdir(self.root+'test/')

    def __getitem__(self,index):
        if self.train:
            x_root = self.wave_list[index]
            target = self.train_targets[int(x_root.split('_')[0])]

            x = torch.load(self.root+'train/'+x_root)
            y = torch.tensor(target,dtype=torch.float32)
            y[y>0.1]=1.
            return x,y
        else:
            x_root = self.wave_list[index]
            x = torch.load(self.root+'test/'+x_root)
            return x
            
    def __len__(self):
        return len(self.wave_list)

In [4]:
dataset = MyDataset()

train_ratio = 0.8
train_size = int(len(dataset)*train_ratio)
val_size = len(dataset)-train_size

train_dataset,val_dataset = torch.utils.data.random_split(dataset,[train_size,val_size])

batch_size = 2**8
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
val_loader = DataLoader(dataset=train_dataset, batch_size=100)

In [5]:
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, 
                     stride=stride, padding=1, bias=False)

# Residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.elu = nn.ELU(inplace=True)
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample
        
    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.elu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.elu(out)
        return out

# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(1, 16)
        self.bn = nn.BatchNorm2d(16)
        self.elu = nn.ReLU(inplace=True)
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(44800, 30)
        
    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = x.view(x.size(0),1,80,138)
        out = self.conv(x)
        out = self.bn(out)
        out = self.elu(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1)
        out = F.softmax(self.fc(out))
        return out

In [6]:
torch.cuda.init()
torch.cuda.is_initialized()
torch.cuda.empty_cache()

cuda_use = True
resnet = ResNet(ResidualBlock, [3, 3, 3])
print('# of parameters = ',sum(p.numel() for p in resnet.parameters()))

if cuda_use and torch.cuda.is_available():
    resnet.cuda()

    criterion = nn.KLDivLoss(reduction='batchmean')
optimizer = optim.Adam(params=resnet.parameters(),lr=1e-4)

# of parameters =  1636046


In [7]:
len(dataset)

400000

In [None]:
plot_list = {'train':[],'val':[]}
time_list = [time.time()]

for epoch in range(100):
    if epoch>30:
        optimizer = optim.Adam(params=resnet.parameters(),lr=1e-4/2)
    elif epoch>60:
        optimizer = optim.Adam(params=resnet.parameters(),lr=1e-4/4)
    resnet.train()
    train_losses = []
    for i, data in enumerate(train_loader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, targets = data
        inputs, targets = inputs.cuda(),targets.cuda()
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        outputs = resnet(inputs)
        train_loss = criterion(outputs.log(), targets)
        train_loss.backward()
        optimizer.step()
        train_losses.append(train_loss.detach())
        del data
        del inputs
        del targets
        
    resnet.eval()
    val_losses = []
    for data in val_loader:
        inputs, targets = data
        inputs, targets = inputs.cuda(),targets.cuda()

        outputs = resnet(inputs)
        val_loss = criterion(outputs.log(), targets)
        val_losses.append(val_loss.detach())
        del data
        del inputs
        del targets
        
    time_list.append(time.time())
    print(f"{epoch+1} time = {time_list[-1]-time_list[-2]} seconds")
    print(f"{epoch+1} epoch val loss = {sum(val_losses)/len(val_losses)}")
    print(f"{epoch+1} epoch train loss = {sum(train_losses)/len(train_losses)}")
    print('--------------------------------------------------')
    plot_list['train'].append(sum(train_losses)/len(train_losses))
    plot_list['val'].append(sum(val_losses)/len(val_losses))
    
print('Finished Training')



1 time = 450.49830961227417 seconds
1 epoch val loss = 10.98992919921875
1 epoch train loss = 11.689608573913574
--------------------------------------------------
2 time = 451.81155157089233 seconds
2 epoch val loss = 10.902934074401855
2 epoch train loss = 10.751411437988281
--------------------------------------------------
3 time = 451.85200905799866 seconds
3 epoch val loss = 10.453518867492676
3 epoch train loss = 10.346803665161133
--------------------------------------------------
4 time = 451.8627653121948 seconds
4 epoch val loss = 9.885323524475098
4 epoch train loss = 10.056208610534668
--------------------------------------------------
5 time = 451.8719573020935 seconds
5 epoch val loss = 10.271794319152832
5 epoch train loss = 9.818068504333496
--------------------------------------------------
6 time = 451.7564227581024 seconds
6 epoch val loss = 9.635022163391113
6 epoch train loss = 9.617191314697266
--------------------------------------------------
