In [3]:
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

from scipy.io import savemat

from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
# import warnings
# warnings.filterwarnings('ignore')

In [4]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


In [6]:
data = pd.read_csv('../data/data.csv', index_col='filename')
data.head()

Unnamed: 0_level_0,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
blues.00043.au,0.399025,0.127311,2155.654923,2372.403604,5012.019693,0.087165,-109.165355,100.6215,-8.614721,47.358475,...,6.585774,-8.642621,4.912259,-15.442804,1.53875,-6.732474,1.417774,-3.96175,3.28746,blues
blues.00012.au,0.26932,0.119072,1361.045467,1567.804596,2739.625101,0.069124,-207.20808,132.799175,-15.438986,60.986727,...,-0.613248,0.384877,2.605128,-5.188924,-9.527455,-9.244394,-2.848274,-1.418707,-5.932607,blues
blues.00026.au,0.278484,0.07697,1198.607665,1573.308974,2478.37668,0.051988,-284.819504,108.785628,9.131956,51.25903,...,-3.303735,1.601561,2.660517,3.323455,3.25892,-4.551106,0.493845,5.937066,3.231544,blues
blues.00077.au,0.408876,0.243217,2206.771246,2191.473506,4657.388504,0.111526,-29.01099,104.532914,-30.974207,38.156392,...,10.786454,-10.558812,6.877709,-10.294858,6.967845,-10.2561,0.705014,-6.000722,1.348955,blues
blues.00084.au,0.396258,0.235238,2061.150735,2085.159448,4221.149475,0.113397,-38.965941,112.039843,-31.817035,38.240835,...,13.327049,-10.921602,9.795615,-5.031277,7.200982,-6.754969,2.663612,-4.38043,0.414055,blues


In [7]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

In [9]:
len(data)

1000

In [5]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
