In [1]:
from __future__ import division
from __future__ import print_function

import argparse

import librosa
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

In [2]:
def load(path_list):
    load_data = (lambda path: librosa.load(path)[0])
    data = list(map(load_data, path_list))
    return data

In [3]:
def feature_extraction(data):
    """
    wavファイルのリストから特徴抽出を行い，リストで返す
    扱う特徴量はMFCC13次元の平均（0次は含めない）
    Args:
        path_list: 特徴抽出するファイルのパスリスト
    Returns:
        features: 特徴量
    """
    features = np.array([np.mean(librosa.feature.mfcc(y=y, n_mfcc=20), axis=1) for y in data])

    return features

In [4]:
training = pd.read_csv("../training.csv")
test = pd.read_csv("../test.csv")

In [5]:
# 学習データの読み込み
train_data = load("../" + training["path"].values)
test_data = load("../" + test["path"].values)

In [6]:
# データの特徴抽出
X_train = feature_extraction(train_data)
X_test = feature_extraction(test_data)
Y_train = np.array(training["label"])

In [7]:
# 学習データを学習データとバリデーションデータに分割 (バリデーションセットを20%とした例)
X_train, X_valid, Y_train, Y_valid = train_test_split(
    X_train, Y_train,
    test_size=0.2,
    random_state=20200616,
)

In [8]:
scaler = StandardScaler()
scaler.fit(X_train)

x_train = scaler.transform(X_train)
x_valid = scaler.transform(X_valid)

x_train = torch.from_numpy(x_train).float()
y_train = torch.from_numpy(np.array(Y_train)).long()
x_valid = torch.from_numpy(x_valid).float()
y_valid = torch.from_numpy(np.array(Y_valid)).long()

train_dataset = TensorDataset(x_train, y_train)
valid_dataset = TensorDataset(x_valid, y_valid)

In [9]:
batch_size = 16
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)

In [10]:
class Net(nn.Module):    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(20, 255)
        self.fc2 = nn.Linear(255, 255)
        # self.fc3 = nn.Linear(255, 255)
        self.fc3 = nn.Linear(255, 10)
        # self.dropout1 = nn.Dropout2d(0.2)
        # self.dropout2 = nn.Dropout2d(0.2)
        # self.dropout3 = nn.Dropout2d(0.2)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        # x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        # x = self.dropout2(x)
        # x = F.relu(self.fc3(x))
        # x = self.dropout3(x)
        x = F.softmax(self.fc3(x), dim=1)
        return x

In [11]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = Net().to(device)
print(net)

Net(
  (fc1): Linear(in_features=25, out_features=255, bias=True)
  (fc2): Linear(in_features=255, out_features=255, bias=True)
  (fc3): Linear(in_features=255, out_features=10, bias=True)
)


In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.01)
epochs = 200

In [13]:
train_loss_list, train_acc_list, val_loss_list, val_acc_list = [], [], [], []
 
for epoch in range(epochs):
    train_loss, train_acc, val_loss, val_acc = 0, 0, 0, 0
    
    net.train()
    for i, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        train_loss += loss.item()
        acc = (outputs.max(1)[1] == labels).sum()
        train_acc += acc.item()
        loss.backward()      
        optimizer.step()
        avg_train_loss = train_loss / len(train_loader.dataset)
        avg_train_acc = train_acc / len(train_loader.dataset)
    
    net.eval()
    with torch.no_grad():
      for inputs, labels in valid_loader:        
          inputs, labels = inputs.to(device), labels.to(device)
          outputs = net(inputs)
          loss = criterion(outputs, labels)
          val_loss += loss.item()
          acc = (outputs.max(1)[1] == labels).sum()
          val_acc += acc.item()
    avg_val_loss = val_loss / len(valid_loader.dataset)
    avg_val_acc = val_acc / len(valid_loader.dataset)
    
    print ('Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}' 
                   .format(epoch+1, epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc))
 
    train_loss_list.append(avg_train_loss)
    train_acc_list.append(avg_train_acc)
    val_loss_list.append(avg_val_loss)
    val_acc_list.append(avg_val_acc)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (16x20 and 25x255)

In [None]:
x = [i for i in range(1, 201)]
plt.plot(x, train_acc_list)
plt.plot(x, val_acc_list)

In [None]:
plt.plot(x, train_loss_list)
plt.plot(x, val_loss_list)