In [1]:
import os
import numpy as np
import time
import copy
from glob import glob

import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from PIL import Image
from torch.autograd import Variable
import random

import librosa
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
import pathlib
import csv


In [None]:
from sklearn.preprocessing import scale
mfccs = sklearn.preprocessing.scale(mfccs, axis=1)

In [6]:
cmap = plt.get_cmap('inferno')
data=[]
plt.figure(figsize=(10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y,sr = librosa.load(songname, mono=True, duration=5)
        S=librosa.feature.mfcc(y=y,sr=sr,n_mfcc=50)
        #data.append(y)
        #D= np.abs(librosa.stft(y))**2
        #S = librosa.feature.melspectrogram(S=D)
        data.append(S)
        #S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128,fmax=8000)
        
        

<Figure size 720x720 with 0 Axes>

In [7]:
data1=np.array(data)
data1.shape

(1000, 50, 216)

In [8]:
data2= pd.read_csv('data.csv')
data2.head()



# Dropping unneccesary columns
data2 = data2.drop(['filename'],axis=1)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genre=[x for x in genres.split(' ')]
for i in range(len(genre)):
    data2=data2.replace(genre[i],i)
    
data2['label'].values.shape


(1000,)

In [9]:
trainX, testX, trainY, testY = train_test_split(data1,data2['label'].values , test_size=0.2)
valX=testX[100:]
valY=testY[100:]
testX=testX[:100]
testY=testY[:100]



In [10]:
print (trainX.shape)
print (trainY.shape)
print (valX.shape)
print (valY.shape)
print (testX.shape)
print(testY.shape)

(800, 50, 216)
(800,)
(100, 50, 216)
(100,)
(100, 50, 216)
(100,)


In [11]:
from torch.utils.data import TensorDataset, DataLoader
batch_size=80

train_data = TensorDataset(torch.from_numpy(trainX), torch.from_numpy(trainY))
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_data = TensorDataset(torch.from_numpy(valX), torch.from_numpy(valY))
valid_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_data = TensorDataset(torch.from_numpy(testX), torch.from_numpy(testY))
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)


# First try

In [2]:
os.getcwd()

'C:\\Users\\Administrator\\Desktop\\CS DATA PROJECT'

In [174]:
data= pd.read_csv('data.csv')
data.head()



# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'
genre=[x for x in genres.split(' ')]
for i in range(len(genre)):
  data=data.replace(genre[i],i)


In [176]:
data=data.drop(['chroma_stft','rmse','spectral_centroid','spectral_bandwidth','rolloff','zero_crossing_rate'],axis=1)
data[:1]

Unnamed: 0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,-113.596742,121.557302,-19.158825,42.351029,-6.376457,18.618875,-13.697911,15.34463,-12.285266,10.980491,...,8.810668,-3.667367,5.75169,-5.162761,0.750947,-1.691937,-0.409954,-2.300208,1.219928,0


In [177]:

data2=data.sample(n=800, random_state=1)
for i in range(len(data)-1,-1,-1):
  if data.values.tolist()[i] in data2.values.tolist():
    data=data.drop([i])
n_classes = 10
data1=data[list(data.keys())[:-1]].values.reshape(-1,len(list(data.keys()))-1)


valX, testX, valY, testY = train_test_split(data[list(data.keys())[:-1]].values.reshape(-1,len(list(data.keys()))-1),data['label'].values , test_size=0.5)
trainX,trainY=data2[list(data.keys())[:-1]].values.reshape(-1,len(list(data.keys()))-1),data2['label'].values
#trainX, trainY, valX, valY, testX = data1[:800],data['label'].values[:800],data1[900:],data['label'].values[900:],data1[800:900]
#df_test_set= data[list(data.keys())].values[800:900].reshape(-1,len(list(data.keys())))

#print (df_test_set.shape)

In [178]:
print (trainX.shape)
print (trainY.shape)
print (valX.shape)
print (valY.shape)
print (testX.shape)
print(testY.shape)

(800, 20)
(800,)
(98, 20)
(98,)
(98, 20)
(98,)


In [13]:
import logging
handler=logging.basicConfig(level=logging.INFO)
lgr = logging.getLogger(__name__)
# check if CUDA is available
use_cuda = torch.cuda.is_available()
# move tensors to GPU if CUDA is available
FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
Tensor = FloatTensor

lgr.info("USE CUDA=" + str (use_cuda))

INFO:__main__:USE CUDA=True


In [14]:
def XnumpyToTensor(x_data_np):
    x_data_np = np.array(x_data_np, dtype=np.float32)        
    print(x_data_np.shape)
    print(type(x_data_np))

    if use_cuda:
        lgr.info ("Using the GPU")    
        X_tensor = Variable(torch.from_numpy(x_data_np).cuda()) # Note the conversion for pytorch    
    else:
        lgr.info ("Using the CPU")
        X_tensor = Variable(torch.from_numpy(x_data_np)) # Note the conversion for pytorch
    
    print(type(X_tensor.data)) # should be 'torch.cuda.FloatTensor'            
    print((X_tensor.data.shape)) # torch.Size([108405, 29])
    return X_tensor


# Convert the np arrays into the correct dimention and type
# Note that BCEloss requires Float in X as well as in y
def YnumpyToTensor(y_data_np):    
    y_data_np=y_data_np.reshape((y_data_np.shape[0],1)) # Must be reshaped for PyTorch!
    print(y_data_np.shape)
    print(type(y_data_np))

    if use_cuda:
        lgr.info ("Using the GPU")            
    #     Y = Variable(torch.from_numpy(y_data_np).type(torch.LongTensor).cuda())
        Y_tensor = Variable(torch.from_numpy(y_data_np)).type(torch.LongTensor).cuda()  # BCEloss requires Float        
    else:
        lgr.info ("Using the CPU")        
    #     Y = Variable(torch.squeeze (torch.from_numpy(y_data_np).type(torch.LongTensor)))  #         
        Y_tensor = Variable(torch.from_numpy(y_data_np)).type(torch.LongTensor)  # BCEloss requires Float        

    print(type(Y_tensor.data)) # should be 'torch.cuda.FloatTensor'
    print(y_data_np.shape)
    print(type(y_data_np))    
    return Y_tensor

In [16]:
# use_cuda=False
X_tensor_train= XnumpyToTensor(trainX) # default order is NBC for a 3d tensor, but we have a 2d tensor
X_shape=X_tensor_train.data.size()

# Dimensions
# Number of features for the input layer
N_FEATURES=trainX.shape[1]
# Number of rows
NUM_ROWS_TRAINNING=trainX.shape[0]
# this number has no meaning except for being divisable by 2
N_MULT_FACTOR=4 # min should be 4
# Size of first linear layer
N_HIDDEN=N_FEATURES*2
# CNN kernel size
N_CNN_KERNEL=2
MAX_POOL_KERNEL=4

DEBUG_ON=False

def debug(x):
    if DEBUG_ON:
        print ('(x.size():' + str (x.size()))
    
class Net2(nn.Module):    
    def __init__(self, n_feature, n_hidden, n_output, n_cnn_kernel, n_mult_factor=N_MULT_FACTOR,n=8):
        super(Net2, self).__init__()
        self.n_feature=n_feature
        self.n_hidden=int(n_hidden)
        self.n_output= n_output 
        self.n_cnn_kernel=n_cnn_kernel
        self.n_mult_factor=n_mult_factor
        #self.n_l2_hidden=self.n_hidden * (self.n_mult_factor - self.n_cnn_kernel + 3)
        self.n_l2_hidden=10400
#         self.n_out_hidden=int (self.n_l2_hidden/2)
                        
     
        self.c1= nn.Sequential(            
            torch.nn.Conv1d(self.n_feature, self.n_hidden, 
                            kernel_size=(self.n_cnn_kernel,), stride=(1,), padding=(1,),dilation=4),
            torch.nn.AvgPool1d(self.n_cnn_kernel),
            torch.nn.Dropout(p=1 -.75),            
            torch.nn.LeakyReLU (0.1),
            torch.nn.BatchNorm1d(self.n_hidden, eps=1e-05, momentum=0.1, affine=True)        
        )         
        self.c2= nn.Sequential(            
            torch.nn.Conv1d(self.n_hidden, 2*self.n_hidden, 
                            kernel_size=(self.n_cnn_kernel,), stride=(1,),padding=(1,), dilation=4),
            torch.nn.AvgPool1d(self.n_cnn_kernel),
            torch.nn.Dropout(p=1 -.75),            
            torch.nn.LeakyReLU (0.1),
            torch.nn.BatchNorm1d(2*self.n_hidden, eps=1e-05, momentum=0.1, affine=True)        
        )    
        self.out = nn.Sequential(
            torch.nn.Linear(self.n_l2_hidden,
                            self.n_output),  
        )                
        self.sof=nn.Softmax()

        
    def forward(self, x):
        #debug(x)
        #print('l1 in: ',x.shape)
        varSize=x.data.shape[0] # must be calculated here in forward() since its is a dynamic size        
        #x=self.l1(x) 
        #print('l1 out: ',x.shape)
        #debug(x)
        # for CNN        
        #x = x.view(varSize,1,-1)
        #print('c1 input: ',x.shape)
        debug(x)
        x=self.c1(x)
        print('c1 output: ',x.shape)
        debug(x)
        #x = x.view(varSize,self.n_feature,-1)
        print('c2 input: ',x.shape)
        x=self.c2(x)
        print('c2 output: ',x.shape)
        #debug(x)
 
        # for Linear layer
        #x = x.view(varSize, self.n_hidden * (self.n_mult_factor -self.n_cnn_kernel + 3))
        x = x.view(varSize, -1)
        print('l2 input: ',x.shape)
        debug(x)
#         x=self.l2(x) 

        x=self.out(x)   
        print('l2 out: ',x.shape)
        debug(x)
        x=self.sof(x)
        print('sof out: ',x.shape)
        return x
    

net = Net2(n_feature=N_FEATURES, n_hidden=N_HIDDEN, n_output=10, n_cnn_kernel=N_CNN_KERNEL)   # define the network    
if use_cuda:
    net=net.cuda() # very important !!!
lgr.info(net)
b = net(X_tensor_train)
print ('(b.size():' + str (b.size())) # torch.Size([108405, 928])

(800, 50, 216)
<class 'numpy.ndarray'>


INFO:__main__:Using the GPU


<class 'torch.Tensor'>
torch.Size([800, 50, 216])


INFO:__main__:Net2(
  (c1): Sequential(
    (0): Conv1d(50, 100, kernel_size=(2,), stride=(1,), padding=(1,))
    (1): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
    (2): Dropout(p=0.25)
    (3): LeakyReLU(negative_slope=0.1)
    (4): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (c2): Sequential(
    (0): Conv1d(100, 200, kernel_size=(2,), stride=(1,), padding=(1,), dilation=(4,))
    (1): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
    (2): Dropout(p=0.25)
    (3): LeakyReLU(negative_slope=0.1)
    (4): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (out): Sequential(
    (0): Linear(in_features=520, out_features=10, bias=True)
  )
  (sof): Sigmoid()
)


c1 input:  torch.Size([800, 50, 216])
c1 output:  torch.Size([800, 100, 108])
c2 input:  torch.Size([800, 100, 108])
c2 output:  torch.Size([800, 200, 53])
l2 input:  torch.Size([800, 10600])


RuntimeError: size mismatch, m1: [800 x 10600], m2: [520 x 10] at c:\programdata\miniconda3\conda-bld\pytorch_1533090623466\work\aten\src\thc\generic/THCTensorMathBlas.cu:249

In [42]:
# use_cuda=False
X_tensor_train= XnumpyToTensor(trainX) # default order is NBC for a 3d tensor, but we have a 2d tensor
X_shape=X_tensor_train.data.size()

# Dimensions
# Number of features for the input layer
N_FEATURES=trainX.shape[1]
# Number of rows
NUM_ROWS_TRAINNING=trainX.shape[0]
# this number has no meaning except for being divisable by 2
N_MULT_FACTOR=4 # min should be 4
# Size of first linear layer
N_HIDDEN=N_FEATURES*2
# CNN kernel size
N_CNN_KERNEL=2
MAX_POOL_KERNEL=4

DEBUG_ON=False

def debug(x):
    if DEBUG_ON:
        print ('(x.size():' + str (x.size()))
    
class Net2(nn.Module):    
    def __init__(self, n_feature, n_hidden, n_output, n_cnn_kernel, n_mult_factor=N_MULT_FACTOR,n=8):
        super(Net2, self).__init__()
        self.n_feature=n_feature
        self.n_hidden=int(n_hidden)
        self.n_output= n_output 
        self.n_cnn_kernel=n_cnn_kernel
        self.n_mult_factor=n_mult_factor
        #self.n_l2_hidden=self.n_hidden * (self.n_mult_factor - self.n_cnn_kernel + 3)
        self.n_l2_hidden=10400
#         self.n_out_hidden=int (self.n_l2_hidden/2)
                        
     
        self.c1= nn.Sequential(            
            torch.nn.Conv1d(self.n_feature, self.n_hidden, 
                            kernel_size=(self.n_cnn_kernel,), stride=(1,), padding=(1,),dilation=4),
            torch.nn.AvgPool1d(self.n_cnn_kernel),
            torch.nn.Dropout(p=1 -.75),            
            torch.nn.LeakyReLU (0.1),
            torch.nn.BatchNorm1d(self.n_hidden, eps=1e-05, momentum=0.1, affine=True)        
        )         
        self.c2= nn.Sequential(            
            torch.nn.Conv1d(self.n_hidden, 2*self.n_hidden, 
                            kernel_size=(self.n_cnn_kernel,), stride=(1,),padding=(1,), dilation=4),
            torch.nn.AvgPool1d(self.n_cnn_kernel),
            torch.nn.Dropout(p=1 -.75),            
            torch.nn.LeakyReLU (0.1),
            torch.nn.BatchNorm1d(2*self.n_hidden, eps=1e-05, momentum=0.1, affine=True)        
        )    
        self.out = nn.Sequential(
            torch.nn.Linear(self.n_l2_hidden,
                            self.n_output),  
        )                
        self.sof=nn.Softmax()

        
    def forward(self, x):
        #debug(x)
        #print('l1 in: ',x.shape)
        varSize=x.data.shape[0] # must be calculated here in forward() since its is a dynamic size        
        #x=self.l1(x) 
        #print('l1 out: ',x.shape)
        #debug(x)
        # for CNN        
        #x = x.view(varSize,1,-1)
        #print('c1 input: ',x.shape)
        debug(x)
        x=self.c1(x)
        #print('c1 output: ',x.shape)
        debug(x)
        #x = x.view(varSize,self.n_feature,-1)
        #print('c2 input: ',x.shape)
        x=self.c2(x)
        #print('c2 output: ',x.shape)
        #debug(x)
 
        # for Linear layer
        #x = x.view(varSize, self.n_hidden * (self.n_mult_factor -self.n_cnn_kernel + 3))
        x = x.view(varSize, -1)
        #print('l2 input: ',x.shape)
        debug(x)
#         x=self.l2(x) 

        x=self.out(x)   
        #print('l2 out: ',x.shape)
        debug(x)
        x=self.sof(x)
        #print('sof out: ',x.shape)
        return x
    

net = Net2(n_feature=N_FEATURES, n_hidden=N_HIDDEN, n_output=10, n_cnn_kernel=N_CNN_KERNEL)   # define the network    
if use_cuda:
    net=net.cuda() # very important !!!
lgr.info(net)
#b = net(X_tensor_train)
#print ('(b.size():' + str (b.size())) # torch.Size([108405, 928])

(800, 50, 216)
<class 'numpy.ndarray'>


INFO:__main__:Using the GPU


<class 'torch.Tensor'>
torch.Size([800, 50, 216])


INFO:__main__:Net2(
  (c1): Sequential(
    (0): Conv1d(50, 100, kernel_size=(2,), stride=(1,), padding=(1,), dilation=(4,))
    (1): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
    (2): Dropout(p=0.25)
    (3): LeakyReLU(negative_slope=0.1)
    (4): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (c2): Sequential(
    (0): Conv1d(100, 200, kernel_size=(2,), stride=(1,), padding=(1,), dilation=(4,))
    (1): AvgPool1d(kernel_size=(2,), stride=(2,), padding=(0,))
    (2): Dropout(p=0.25)
    (3): LeakyReLU(negative_slope=0.1)
    (4): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (out): Sequential(
    (0): Linear(in_features=10400, out_features=10, bias=True)
  )
  (sof): Softmax()
)


In [25]:
trainX.shape

(800, 50, 216)

In [36]:
loss_func = nn.CrossEntropyLoss()

# specify optimizer
optimizer = optim.Adam(net.parameters(), lr=0.0000025)
if use_cuda:
    lgr.info ("Using the GPU")    
    net.cuda()
    loss_func.cuda()

lgr.info (optimizer)
lgr.info (loss_func)

INFO:__main__:Using the GPU
INFO:__main__:Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 5e-06
    weight_decay: 0
)
INFO:__main__:CrossEntropyLoss()


In [27]:
from torch.utils.data import TensorDataset, DataLoader
batch_size=80

train_data = TensorDataset(torch.from_numpy(trainX), torch.from_numpy(trainY))
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_data = TensorDataset(torch.from_numpy(valX), torch.from_numpy(valY))
valid_loader = DataLoader(val_data, shuffle=True, batch_size=batch_size)
test_data = TensorDataset(torch.from_numpy(testX), torch.from_numpy(testY))
test_loader = DataLoader(test_data, shuffle=True, batch_size=batch_size)


In [41]:
import time
train_on_gpu=torch.cuda.is_available()
start_time = time.time()    
epochs=600
all_losses = []
#batch_size=80
X_tensor_train= XnumpyToTensor(trainX)
#Y_tensor_train= YnumpyToTensor(trainY)
counter=0
#print(type(X_tensor_train.data), type(Y_tensor_train.data)) # should be 'torch.cuda.FloatTensor'
net.train()
# From here onwards, we must only use PyTorch Tensors
for step in range(epochs):
  train_loss=0.0

  for X_t_train, Y_t_train in train_loader:   
    counter+=1
    if (train_on_gpu):
        X_t_train,Y_t_train=X_t_train.to('cuda'),Y_t_train.to('cuda')
    
    out = net(X_t_train.float())                 # input x and predict based on x
    loss = loss_func(out, Y_t_train.squeeze_())     # must be (1. nn output, 2. target), the target label is NOT one-hotted
    #loss = loss_func(out, Y_t_train.to(dtype=torch.float))   
    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients

    #train_loss+=cost.item()

    if counter % 100 == 0:
        # Get validation loss

        val_losses = []
        net.eval()
        for inputs, labels in valid_loader:
            
            # Creating new variables for the hidden state, otherwise
            # we'd backprop through the entire training history


            if(train_on_gpu):
                inputs, labels = inputs.cuda(), labels.cuda()

            output= net(inputs.float())
            val_loss = loss_func(output, labels)

            val_losses.append(val_loss.item())

        
        print("Epoch: {}/{}...".format(step+1, epochs),
              "Step: {}...".format(counter),
              "Loss: {:.6f}...".format(loss.item()),
              "Val Loss: {:.6f}".format(np.mean(val_losses)))      

        #prediction = out.data # probabilities             
        #_,pred_y = torch.max(out.data, 1)
        #pred_y=pred_y.numpy()
        #target_y = Y_t_train.cpu().data.numpy()
        #target_y=torch.from_numpy(target_y)
       #print('pred_y: ',pred_y)
        #print('target: ',target_y)      
        #accuracy=np.sum(pred_y==target_y)/float(len(target_y))

        #print('Accuracy: ',accuracy)


(800, 50, 216)
<class 'numpy.ndarray'>


INFO:__main__:Using the GPU


<class 'torch.Tensor'>
torch.Size([800, 50, 216])




Epoch: 10/600... Step: 100... Loss: 1.536524... Val Loss: 1.956185


KeyboardInterrupt: 

In [47]:
test_losses = [] # track loss
num_correct = 0


net.eval()
# iterate over test data
for inputs, labels in test_loader:


    if(train_on_gpu):
        inputs, labels = inputs.cuda(), labels.cuda()
    
    # get predicted outputs
    output= net(inputs.float())
    
    # calculate loss
    test_loss = loss_func(output, labels)
    test_losses.append(test_loss.item())
    
    # convert output probabilities to predicted class (0 or 1)
    _,pred = torch.max(torch.round(output.squeeze()).data,1)  # rounds to the nearest integer
    #print('pred: ',pred)
    #print('labels: ',labels.shape)
    # compare predictions to true label
    correct_tensor = pred.eq(labels.view_as(pred))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    num_correct += np.sum(correct)


# -- stats! -- ##
# avg test loss
print("Test loss: {:.3f}".format(np.mean(test_losses)))

# accuracy over all test data
test_acc = num_correct/len(test_loader.dataset)
print("Test accuracy: {:.3f}".format(test_acc))    
    



Test loss: 2.297
Test accuracy: 0.100


In [48]:
pred

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0')

In [49]:
labels

tensor([8, 7, 1, 5, 2, 4, 6, 8, 5, 9, 2, 2, 6, 6, 9, 6, 5, 3, 0, 3], device='cuda:0')

0.2222222222222222