In [26]:
%matplotlib inline
import matplotlib.pyplot as plt
import os
from scipy.io import wavfile
from collections import defaultdict, Counter
from scipy import signal
import numpy as np
import librosa
import random as rn

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset
import torchvision
from torchvision import datasets, transforms

pad1d = lambda a,i : a[0:i] if a.shape[0] > i else np.hstack((a, np.zeros(i - a.shape[0])))
pad2d = lambda a,i : a[:, 0:i] if a.shape[1] > i else np.hstack((a, np.zeros((a.shape[0], i - a.shape[1]))))

In [27]:
use_cuda = torch.cuda.is_available()

class CNNClassifier(nn.Module):
    
    def __init__(self):
        super(CNNClassifier, self).__init__()
        conv1 = nn.Conv2d(1, 32, 2) # 1@20*40 -> 32@19*39
        pool1 = nn.MaxPool2d(2,2) # 32@9*19
        conv2 = nn.Conv2d(32, 64, 2) # 32@9*19 -> 64@8*18 
        conv2_bn = nn.BatchNorm2d(64)
        pool2 = nn.MaxPool2d(2,2) # 64@8*18 -> 64@4*9
        conv3 = nn.Conv2d(64, 128, 2) # 64@4*9 -> 128@3*8
        conv3_bn = nn.BatchNorm2d(128)
        pool3 = nn.MaxPool2d(2,2) # 128@3*8 -> 128@1*4
        
        self.conv_module = nn.Sequential(
            conv1,
            nn.ReLU(),
            pool1,
            conv2,
            conv2_bn,
            nn.ReLU(),
            pool2,
            conv3,
            conv3_bn,
            nn.ReLU(),
            pool3
        )
        
        fc1 = nn.Linear(128*1*4, 64)
        fc1_bn = nn.BatchNorm1d(64)
        fc2 = nn.Linear(64, 32)
        fc2_bn = nn.BatchNorm1d(32)
        fc3 = nn.Linear(32, 10)
        
        self.fc_module = nn.Sequential(
            fc1,
            fc1_bn,
            nn.ReLU(),
            fc2,
            fc2_bn,
            nn.ReLU(),
            fc3
        )
        
    def forward(self, x):
        out = self.conv_module(x) # @128*254*7
        dim = 1
        for d in out.size()[1:]:
            dim = dim * d
        out = out.view(-1, dim)
        out = self.fc_module(out)
        return F.softmax(out, dim=1)

In [28]:
cnn = CNNClassifier().cuda()

In [34]:
cnn.load_state_dict(torch.load("G:/2020/KIST/2. code/Python/Audio/Deep Learning/Model_1/cnn.pt"))
cnn.eval()

CNNClassifier(
  (conv_module): Sequential(
    (0): Conv2d(1, 32, kernel_size=(2, 2), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(2, 2), stride=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(2, 2), stride=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU()
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_module): Sequential(
    (0): Linear(in_features=512, out_features=64, bias=True)
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Linear(in_features=64, out_features=32, bias=True)
    (4): BatchNorm1d(32, eps=1e-05, moment

In [None]:
import serial
ser = serial.Serial(
    'COM5',
    115200
)

In [35]:
wav, _ = librosa.load("C:/Users/ADmin/Desktop/ë‹­/four_2.wav")

mfcc = librosa.feature.mfcc(wav)
padded_mfcc = pad2d(mfcc,40)
padded_mfcc = torch.cuda.FloatTensor(padded_mfcc)
mfcc = padded_mfcc.unsqueeze(0)
mfcc = mfcc.unsqueeze(0)

In [36]:
mfcc.shape

torch.Size([1, 1, 20, 40])

In [37]:
pred_label = cnn(mfcc)
pred_label = torch.argmax(pred_label).item()
pred = str(pred_label)

In [38]:
pred

'4'

In [None]:
while True:
    if ser.readable():
        
        shape_num = a
        ser.write(shape_num.encode())
        break