In [1]:
from torch.utils import data as data_utils
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class DataSet(data_utils.Dataset):
    def __init__(self, paths, labels):
        self.data_paths = paths
        self.labels = labels
        
    def __getitem__(self, indice):
        return np.load(self.data_paths[indice]), int(self.labels[indice])
    
    def __len__(self):
        return len(self.data_paths)

In [3]:
def get_data_loader(batch_size, n_workers, is_train):
    p = Path()
    p = p/'..'/'references'
    ref_path = str(p.absolute())
    p = Path()
    _dict = []
    str_paths = []
    
    if is_train:
        p = p/'..'/'data'/'processed'/'train'
        paths = list(p.glob('*.npy'))
        
        with open(ref_path+'/dict_train.txt', mode='r') as f:
            s = f.read()
            
        tmp = s.split()
        
        for i in tmp:
            _dict.append(i.split(':'))
            
    else:
        p = p/'..'/'data'/'processed'/'test'
        paths = list(p.glob('*.npy'))
        
        with open(ref_path+'/dict_test.txt', mode='r') as f:
                    s = f.read()

        tmp = s.split()

        for i in tmp:
            _dict.append(i.split(':'))

    _dict = np.array(_dict)
    
    for i in paths:
        str_paths.append(str(i))
    
    labels = []
    
    for i in str_paths:
        file_name = i.replace('.npy', '')
        file_name = file_name.replace(str(p), '')
        file_name = file_name.replace('/', '')
        
        idx = list(zip(*np.where(_dict==file_name)))
        
        if (not idx):
            file_name += '.npy'
            p = p/file_name
            p.unlink()
        else:
            labels.append(_dict[idx[0][0]][1])
            
    if is_train:
        p = p/'..'/'data'/'processed'/'train'
        paths = list(p.glob('*.npy'))
    else:
        p = p/'..'/'data'/'processed'/'test'
        paths = list(p.glob('*.npy'))
    
    for i in paths:
        str_paths.append(str(i))
        
    dataset = DataSet(str_paths, labels)
    data_loader = data_utils.DataLoader(dataset, batch_size=batch_size, num_workers=n_workers, shuffle=is_train)
    
    return data_loader

In [4]:
class CNN_stride2(nn.Module):
    def __init__(self):
        super(CNN_stride2, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=2)
        self.norm1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=2)
        self.norm2 = nn.BatchNorm2d(64)
        self.linear1 = nn.Linear(7*30*64, 1024)
        self.dropout = nn.Dropout(p=0.3)
        self.linear2 = nn.Linear(1024, 128)

    def forward(self, x):
        x = self.conv1(x)
        x = self.norm1(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)

        return x

In [5]:
class CNN_stride1(nn.Module):
    def __init__(self):
        super(CNN_stride1, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1)
        self.norm1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1)
        self.norm2 = nn.BatchNorm2d(64)
        self.linear1 = nn.Linear(30*123*64, 1024)
        self.dropout = nn.Dropout(p=0.3)
        self.linear2 = nn.Linear(1024, 128)

    def forward(self, x):
        x = self.conv1(x)
        x = self.norm1(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)

        return x

In [6]:
def evaluation(batch_size, n_workers):
    p = Path()
    p = p/'..'/'models'
    path = str(p.absolute())
    
    device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    
    model = CNN_stride1()
    model.to(device)
    model.load_state_dict(torch.load(path+'/model_end.pt'))
    model.eval()
    
    data_loader = get_data_loader(batch_size, n_workers, False)
    
    correct =  0
    total = 0
    
    for i, (data, label) in enumerate(data_loader):
        data = data.to(device)
        label = label.to(device)
        
        interim = model(data.view([-1, 1, 128, 501]))
        
        _, predicted = torch.max(interim.data, 1)

        total += label.size(0)
        correct += (predicted == label).sum().item()
        
    accuracy = 100 * float(correct/total)
    print('Accuracy: {:.2f} %%'.format(accuracy))

In [7]:
evaluation(32, 4)

Accuracy: 91.78 %%
