In [1]:
import os
import time
import shutil
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils as utils
import torch.nn.init as init
from torchvision import datasets

In [2]:
data_type = 'mem'
model_num = 2
time_len =  20  ## ms
point_len = int(time_len*10)
div_num = int(100*(20/time_len))
vow_num = int(div_num/5)

fc_len = int(point_len/32)+1

In [6]:
class Model(nn.Module):
    def __init__(self, loss):
        super(Model, self).__init__()
        input_c = 10
        channel = 32
        self.conv1 = nn.Sequential(
            nn.Conv1d(input_c, channel, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm1d(channel))
        self.conv2 = nn.Sequential(
            nn.Conv1d(channel, channel*2, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm1d(channel*2))
        self.conv3 = nn.Sequential(
            nn.Conv1d(channel*2, channel*2, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm1d(channel*2))
        self.conv4 = nn.Sequential(
            nn.Conv1d(channel*2, channel*4, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm1d(channel*4))
        self.conv5 = nn.Sequential(
            nn.Conv1d(channel*4, channel*4, kernel_size=5, stride=2, padding=2),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm1d(channel*4))
        self.fc1 = nn.Sequential(
            nn.Linear(channel*4*fc_len, 1024),
            nn.LeakyReLU(negative_slope=0.1),
            nn.BatchNorm1d(1024))
        self.fc2 = nn.Sequential(
            nn.Linear(1024, 5),
            nn.Softmax(dim=1))
        self.loss = loss
        
    def forward(self, data, target):
        x = self.conv1(data)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.conv5(x)
        x = self.fc1(x)
        h = self.fc2(x)
        
        l = self.loss(h, target)
        return l, h, target

In [7]:
def train(mod_tr, trX, trY, bat_tr, dev, opt_tr, tr_loss, tr_acc):
    mod_tr.train()
    dloss_tr = 0
    dacc_tr = 0
    
    rand = torch.randperm(trX.size()[0])
    trX = trX[rand]
    trY = trY[rand]
    
    for i in range(bat_tr[0]):
        opt_tr.zero_grad()
        loss_tr, output_tr, target_tr = mod_tr(trX[i*bat_tr[1]:(i+1)*bat_tr[1]], 
                                            trY[i*bat_tr[1]:(i+1)*bat_tr[1]])
        loss_tr = loss_tr.sum()
        loss_tr.backward()
        opt_tr.step()
        
        _, output_tr = torch.max(output_tr, 1)
        _, target_tr = torch.max(target_tr, 1)

        dloss_tr += loss_tr.cpu().item()
        dacc_tr += (output_tr==target_tr).sum().item()

    tr_loss.append(dloss_tr/bat_tr[0])
    tr_acc.append(dacc_tr/(bat_tr[0]*bat_tr[1]))
    return tr_loss, tr_acc

def test(mod_te, teX, teY, bat_te, dev, te_loss, te_acc):
    mod_te.eval()
    dloss_te = 0
    dacc_te = 0
    
    for i in range(bat_te[0]):
        loss_te, output_te, target_te = mod_te(teX[i*bat_te[1]:(i+1)*bat_te[1]], 
                                              teY[i*bat_te[1]:(i+1)*bat_te[1]])
        loss_te = loss_te.sum()
        
        _, output_te = torch.max(output_te, 1)
        _, target_te = torch.max(target_te, 1)

        dloss_te += loss_te.cpu().item()
        dacc_te += (output_te==target_te).sum().item()
        
    te_loss.append(dloss_te/bat_te[0])
    te_acc.append(dacc_te/(bat_te[0]*bat_te[1]))
    return te_loss, te_acc, output_te, target_te

In [9]:
if __name__=='__main__':
    print('[Training]%s_%.1f_model%d'%(data_type, time_len, model_num))
    torch.manual_seed(37)
    torch.cuda.manual_seed_all(37)
    torch.backends.cudnn.deterministic = True
    
    trainX = np.load('npy_data/%s/%s_%.1fms_trainX.npy'%(data_type, data_type, time_len))
    trainY = np.load('npy_data/%s/%s_%.1fms_trainY.npy'%(data_type, data_type, time_len))
    testX = np.load('npy_data/%s/%s_%.1fms_testX.npy'%(data_type, data_type, time_len))
    testY = np.load('npy_data/%s/%s_%.1fms_testY.npy'%(data_type, data_type, time_len))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    batch_num = int(len(testX)/3)
    batch_tr = [int(len(trainX)/batch_num), batch_num]
    batch_te = [int(len(testX)/batch_num), batch_num]
    
    trainX = torch.Tensor(trainX).to(device)
    trainY = torch.Tensor(trainY).to(device)
    testX = torch.Tensor(testX).to(device)
    testY = torch.Tensor(testY).to(device)
    print('mem data shape  -  %.1f ms'%time_len)
    print('train set :', np.shape(trainX) , np.shape(trainY))
    print('test set :', np.shape(testX) ,np.shape(testY))
    
    
    learning_rate = 0.00005
    loss_func=nn.BCELoss()
    model = nn.DataParallel(Model(loss_func)).to(device)
#     model = Model(loss_func).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=batch_num, eta_min = 3e-6)

#     model.load_state_dict(torch.load('ckpt/model%d/%s/%.1f_ckpt_100.pt'%(model_num, data_type, time_len)))
    a = time.time()
    train_loss = []
    train_acc = []
    test_loss = []
    test_acc = []
    for epoch in range(77):
        train_loss, train_acc = train(model, trainX, trainY, batch_tr, device, 
                                      optimizer, train_loss, train_acc)
        test_loss, test_acc, output, target = test(model, testX, testY, batch_te, device, 
                                   test_loss, test_acc)
        scheduler.step()
        
        if epoch%10==0:
            vowel_check = (output==target).cpu().detach().numpy()
            vowel_acc = np.zeros(5)
            for k1 in range(15):
                for k2 in range(5):
                    vowel_acc[k2] += np.sum(vowel_check[div_num*k1 + vow_num*k2 : div_num*k1 + vow_num*(k2+1)])
            print(vowel_acc/(vow_num*15))
            print('epoch %d - train loss : %.7f  /  test loss : %.7f'%(epoch, train_loss[-1], test_loss[-1]))
            print('           train acc : %.7f  /  test acc : %.7f'%(train_acc[-1], test_acc[-1]))
#         if epoch%50==0:
#             print('@@@@@@@ save model : epoch %d'% epoch)
#             torch.save(model.state_dict(),'ckpt/model%d_%s/%.1f_ckpt_%d.pt'%(model_num, data_type, time_len, epoch))
#             np.savetxt('result/model%d_%s/%.1f_loss_tr.txt'%(model_num, data_type, time_len), train_loss)
#             np.savetxt('result/model%d_%s/%.1f_loss_te.txt'%(model_num, data_type, time_len), test_loss)
#             np.savetxt('result/model%d_%s/%.1f_acc_tr.txt'%(model_num, data_type, time_len), train_acc)
#             np.savetxt('result/model%d_%s/%.1f_acc_te.txt'%(model_num, data_type, time_len), test_acc)

#     vowel_check = (output==target).cpu().detach().numpy()
#     vowel_acc = np.zeros(5)
#     for k1 in range(15):
#         for k2 in range(5):
#             vowel_acc[k2] += np.sum(vowel_check[div_num*k1 + vow_num*k2 : div_num*k1 + vow_num*(k2+1)])
#     print(vowel_acc/(vow_num*15))
#     print('epoch %d - train loss : %.7f  /  test loss : %.7f'%(epoch, train_loss[-1], test_loss[-1]))
#     print('           train acc : %.7f  /  test acc : %.7f'%(train_acc[-1], test_acc[-1]))
#     print("training complete! - calculation time :", time.time()-a, '  seconds')

[Training]mem_20.0_model2
mem data shape  -  20.0 ms
train set : torch.Size([5000, 9, 200]) torch.Size([5000, 5])
test set : torch.Size([1500, 9, 200]) torch.Size([1500, 5])


RuntimeError: Given groups=1, weight of size [64, 10, 5], expected input[250, 9, 200] to have 10 channels, but got 9 channels instead

In [8]:
print(np.max(np.loadtxt('result/model2/mfcc/20.0_acc_te.txt')))
print(np.max(np.loadtxt('result/model2/raw/20.0_acc_te.txt')))
print(np.max(np.loadtxt('result/model2/fft/20.0_acc_te.txt')))
print(np.max(np.loadtxt('result/model2/mem/20.0_acc_te.txt')))
print(np.argmax(np.loadtxt('result/model2/mfcc/20.0_acc_te.txt')))
print(np.argmax(np.loadtxt('result/model2/raw/20.0_acc_te.txt')))
print(np.argmax(np.loadtxt('result/model2/fft/20.0_acc_te.txt')))
print(np.argmax(np.loadtxt('result/model2/mem/20.0_acc_te.txt')))

0.846
0.8066666666666666
0.8266666666666667
0.8613333333333333
677
785
269
76


In [11]:
vowel_check = (output==target).cpu().detach().numpy()
vowel_acc = np.zeros(5)
for k1 in range(15):
    for k2 in range(5):
        vowel_acc[k2] += np.sum(vowel_check[div_num*k1 + vow_num*k2 : div_num*k1 + vow_num*(k2+1)])
print('whole accuracy :', vowel_acc/(vow_num*15))
            
vowel_check = (output==target).cpu().detach().numpy()
whole_count=np.array([])
for k1 in range(15):
    for k2 in range(5):
        if k2==3:
            aa = output[div_num*k1 + vow_num*k2 : div_num*k1 + vow_num*(k2+1)].cpu().detach().numpy()+1
            print(aa)
            bb = aa*((-1*np.int32(vowel_check[div_num*k1 + vow_num*k2 : div_num*k1 + vow_num*(k2+1)]))+1)
            whole_count = np.append(whole_count,bb)
unique, counts = np.unique(whole_count, return_counts=True)
#         vowel_acc[k2] += np.sum(vowel_check[div_num*k1 + vow_num*k2 : div_num*k1 + vow_num*(k2+1)])
print('\npart accuracy \n', dict(zip(unique, counts/(len(output)/5))))

whole accuracy : [0.99333333 0.66666667 1.         0.67666667 0.97      ]
[4 4 4 4 4 4 2 4 4 4 4 4 4 4 2 4 4 4 4 4]
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4]
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 2 4 4 4 2]
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4]
[4 2 2 2 4 4 2 2 2 4 4 4 2 4 2 4 2 2 4 2]
[4 4 4 4 2 2 2 4 4 4 2 2 4 2 2 4 4 2 4 2]
[4 4 4 4 2 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
[2 2 2 2 2 2 2 2 2 4 4 2 2 4 2 2 2 4 2 4]
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4]
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4]
[4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4]
[4 4 4 4 4 4 2 4 2 2 4 4 4 4 4 4 4 4 4 4]
[4 4 4 4 2 4 4 4 4 2 4 4 2 4 2 4 4 2 2 4]
[4 2 2 2 2 4 2 4 4 2 2 2 2 2 2 2 2 2 4 2]
[4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]

part accuracy 
 {0.0: 0.6766666666666666, 1.0: 0.06333333333333334, 2.0: 0.26}
