In [1]:
from torch.utils import data as data_utils
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import datetime

In [2]:
class DataSet(data_utils.Dataset):
    def __init__(self, paths, labels):
        self.data_paths = paths
        self.labels = labels
        
    def __getitem__(self, indice):
        return np.load(self.data_paths[indice]), int(self.labels[indice])
    
    def __len__(self):
        return len(self.data_paths)

In [3]:
def get_data_loader(batch_size, n_workers, is_train):
    p = Path()
    p = p/'..'/'references'
    ref_path = str(p.absolute())
    p = Path()
    _dict = []
    str_paths = []
    
    if is_train:
        p = p/'..'/'data'/'processed'/'train'
        paths = list(p.glob('*.npy'))
        
        with open(ref_path+'/dict_train.txt', mode='r') as f:
            s = f.read()
            
        tmp = s.split()
        
        for i in tmp:
            _dict.append(i.split(':'))
            
    else:
        p = p/'..'/'data'/'processed'/'test'
        paths = list(p.glob('*.npy'))
        
        with open(ref_path+'/dict_test.txt', mode='r') as f:
                    s = f.read()

        tmp = s.split()

        for i in tmp:
            _dict.append(i.split(':'))

    _dict = np.array(_dict)
    
    for i in paths:
        str_paths.append(str(i))
    
    labels = []
    
    for i in str_paths:
        file_name = i.replace('.npy', '')
        file_name = file_name.replace(str(p), '')
        file_name = file_name.replace('/', '')
        
        idx = list(zip(*np.where(_dict==file_name)))
        
        if (not idx):
            file_name += '.npy'
            p = p/file_name
            p.unlink()
        else:
            labels.append(_dict[idx[0][0]][1])
            
    if is_train:
        p = p/'..'/'data'/'processed'/'train'
        paths = list(p.glob('*.npy'))
    else:
        p = p/'..'/'data'/'processed'/'test'
        paths = list(p.glob('*.npy'))
    
    for i in paths:
        str_paths.append(str(i))
        
    dataset = DataSet(str_paths, labels)
    data_loader = data_utils.DataLoader(dataset, batch_size=batch_size, num_workers=n_workers, shuffle=is_train)
    
    return data_loader

In [4]:
class CNN_stride2(nn.Module):
    def __init__(self):
        super(CNN_stride2, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=2)
        self.norm1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=2)
        self.norm2 = nn.BatchNorm2d(64)
        self.linear1 = nn.Linear(7*30*64, 1024)
        self.dropout = nn.Dropout(p=0.3)
        self.linear2 = nn.Linear(1024, 128)

    def forward(self, x):
        x = self.conv1(x)
        x = self.norm1(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)

        return x

In [5]:
class CNN_stride1(nn.Module):
    def __init__(self):
        super(CNN_stride1, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1)
        self.norm1 = nn.BatchNorm2d(32)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1)
        self.norm2 = nn.BatchNorm2d(64)
        self.linear1 = nn.Linear(30*123*64, 1024)
        self.dropout = nn.Dropout(p=0.3)
        self.linear2 = nn.Linear(1024, 128)

    def forward(self, x):
        x = self.conv1(x)
        x = self.norm1(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = self.norm2(x)
        x = self.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = x.view(x.size(0), -1)
        x = self.linear1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.linear2(x)

        return x

In [6]:
def train_step(optimizer, model, data, label):
    optimizer.zero_grad()    
    res = model.forward(torch.reshape(data, (-1, 1, 128, 501)))   
    criterion = nn.CrossEntropyLoss()   
    loss = criterion(res, label)   
    loss.backward()
    optimizer.step()
    
    return loss

In [7]:
def train(epoch, batch_size, n_workers, check_interval):
    now = datetime.datetime.now()
    
    p = Path()
    p = p/'..'/'models'
    out_path = str(p.absolute())
    p = Path()
    p = p/'..'/'reports'/'figures'
    tensor_out_path = str(p.absolute())
    tensor_out_path += '/'+str(now.month)+str(now.day)+str(now.hour)+str(now.minute)
    
    
    device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    model = CNN_stride1()
    model.to(device)
    
    data_loader = get_data_loader(batch_size, n_workers, True)
    
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    
    lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.5)
    
    writer = SummaryWriter(tensor_out_path)
    
    for epoch in range(1, epoch+1):
        model.train()
        running_loss = 0
        
        for i, (data, label) in enumerate(data_loader):
            data = data.to(device)
            label = label.to(device)
            loss = train_step(optimizer, model, data, label)
            running_loss += loss.item()
            
        ave_loss = running_loss/len(data_loader)
        writer.add_scalar('Loss', ave_loss, epoch)
        
        lr_scheduler.step()
        
        if epoch%check_interval == 0:
            torch.save(model.state_dict, out_path+'/model'+str(epoch)+'.pt')
            
        print('Epoch: {:d} Train Loss: {:.3f}'.format(epoch, ave_loss))
    
    torch.save(model.state_dict(), out_path+'/model_end.pt')

In [8]:
train(20, 32, 4, 10)

Epoch: 1 Train Loss: 3.986
Epoch: 2 Train Loss: 2.327
Epoch: 3 Train Loss: 1.617
Epoch: 4 Train Loss: 0.671
Epoch: 5 Train Loss: 0.354
Epoch: 6 Train Loss: 0.234
Epoch: 7 Train Loss: 0.166
Epoch: 8 Train Loss: 0.128
Epoch: 9 Train Loss: 0.109
Epoch: 10 Train Loss: 0.091
Epoch: 11 Train Loss: 0.080
Epoch: 12 Train Loss: 0.070
Epoch: 13 Train Loss: 0.062
Epoch: 14 Train Loss: 0.058
Epoch: 15 Train Loss: 0.053
Epoch: 16 Train Loss: 0.046
Epoch: 17 Train Loss: 0.046
Epoch: 18 Train Loss: 0.044
Epoch: 19 Train Loss: 0.041
Epoch: 20 Train Loss: 0.034
