* Baixar todo o dataset
* Conseguir importar o dataset
* Inicialmente trabalhar com o dataset inteiro e utilizar as diferentes extrações de features
* Depois ir pelo segundo caminho

In [123]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import librosa
import librosa.display
from os import listdir
from os.path import isfile, join

import glob
from sklearn import svm

# Dataset

In [73]:
def import_signal(path):
    files = glob.glob(path + '*.au')
    
    audios = []
    
    for file in files:  
        s, sr = librosa.core.load(file, sr=22050)
        audios.append(s)

    return audios, sr

## pop 0

In [259]:
pop_audios, pop_sr = import_signal('dataset/pop/')

In [274]:
pop_audios_df = pd.DataFrame([[x] for x in pop_audios], columns=['x'])
pop_audios_df['y'] = 0
pop_audios_df['y2'] = 0

pop_audios_df.head()

Unnamed: 0,x,y,y2
0,"[-0.0340271, -0.043304443, -0.0463562, -0.0431...",0,0
1,"[-0.058288574, 0.01071167, 0.09915161, 0.07913...",0,0
2,"[-0.26919556, -0.24667358, -0.18579102, -0.136...",0,0
3,"[-0.2119751, -0.0675354, -0.18411255, 0.024597...",0,0
4,"[-0.02709961, -0.023712158, 0.052215576, 0.084...",0,0


## classical 1

In [261]:
classical_audios, classical_sr = import_signal('dataset/classical/')

In [275]:
classical_audios_df = pd.DataFrame([[x] for x in classical_audios], columns=['x'])
classical_audios_df['y'] = 1
classical_audios_df['y2'] = 0

classical_audios_df.head()

Unnamed: 0,x,y,y2
0,"[-0.06484985, -0.10720825, -0.109436035, -0.09...",1,0
1,"[0.007537842, 0.011444092, 0.010345459, 0.0132...",1,0
2,"[0.023345947, 0.028686523, 0.027038574, 0.0279...",1,0
3,"[0.007843018, 0.0072631836, 0.0058288574, 0.00...",1,0
4,"[-0.010772705, -0.020019531, -0.024261475, -0....",1,0


## jazz 2

In [263]:
jazz_audios, jazz_sr = import_signal('dataset/jazz/')

In [276]:
jazz_audios_df = pd.DataFrame([[x] for x in jazz_audios], columns=['x'])
jazz_audios_df['y'] = 2
jazz_audios_df['y2'] = 1

jazz_audios_df.head()

Unnamed: 0,x,y,y2
0,"[-0.031066895, -0.05078125, -0.04537964, -0.04...",2,1
1,"[0.0019836426, 0.0014343262, -0.0017089844, -0...",2,1
2,"[-0.03414917, -0.04660034, -0.027648926, -0.01...",2,1
3,"[-0.021911621, -0.03604126, -0.039001465, -0.0...",2,1
4,"[0.030456543, 0.010772705, -0.008544922, -0.02...",2,1


## rock 3

In [265]:
rock_audios, rock_sr = import_signal('dataset/rock/')

In [277]:
rock_audios_df = pd.DataFrame([[x] for x in rock_audios], columns=['x'])
rock_audios_df['y'] = 3
rock_audios_df['y2'] = 1

rock_audios_df.head()

Unnamed: 0,x,y,y2
0,"[0.035339355, 0.053375244, -0.0047912598, 0.00...",3,1
1,"[-0.07858276, -0.1638794, -0.11288452, 0.00088...",3,1
2,"[-0.026824951, -0.038757324, -0.029693604, -0....",3,1
3,"[-0.023529053, -0.03363037, -0.03527832, -0.04...",3,1
4,"[-0.026641846, -0.051208496, -0.05618286, -0.0...",3,1


In [278]:
pop_sr == classical_sr == jazz_sr == rock_sr

True

In [279]:
sr = pop_sr

# get audios df

In [280]:
audios = (
    pd.concat([
        pop_audios_df, classical_audios_df, 
        jazz_audios_df, rock_audios_df])
    .sample(frac=1).reset_index(drop=True) #shuffle
)

print(f'Rows: {len(audios)}')
audios.head()

Rows: 400


Unnamed: 0,x,y,y2
0,"[0.06652832, 0.16467285, 0.2319336, 0.27746582...",2,1
1,"[-0.0034484863, -0.0024414062, -0.0015869141, ...",2,1
2,"[-0.10986328, -0.076049805, -0.07336426, -0.08...",3,1
3,"[0.012420654, 0.07501221, 0.05819702, 0.014953...",3,1
4,"[-0.035217285, -0.03540039, 0.018859863, 0.061...",2,1


# split into train and test

In [281]:
msk = np.random.rand(len(audios)) < 0.8

train = audios[msk]
test = audios[~msk]

print(f'Rows train:{len(train)}')
print(f'Rows test:{len(test)}')

Rows train:332
Rows test:68


In [282]:
train.groupby('y').count()

Unnamed: 0_level_0,x,y2
y,Unnamed: 1_level_1,Unnamed: 2_level_1
0,77,77
1,89,89
2,80,80
3,86,86


In [283]:
test.groupby('y').count()

Unnamed: 0_level_0,x,y2
y,Unnamed: 1_level_1,Unnamed: 2_level_1
0,23,23
1,11,11
2,20,20
3,14,14


In [284]:
train.groupby('y2').count()

Unnamed: 0_level_0,x,y
y2,Unnamed: 1_level_1,Unnamed: 2_level_1
0,166,166
1,166,166


In [285]:
test.groupby('y2').count()

Unnamed: 0_level_0,x,y
y2,Unnamed: 1_level_1,Unnamed: 2_level_1
0,34,34
1,34,34


# Feature extraction

## MFCC

In [286]:
mfcc_list_train = []

for x in train['x']:
      
    mfcc = np.array(librosa.feature.mfcc(y=x, sr=sr)).flatten()
    zeros = np.zeros((26280 - len(mfcc)))
    mfcc_list_train.append(np.concatenate((mfcc, zeros), axis=0))
    
train['mfcc'] = mfcc_list_train
train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,x,y,y2,mfcc
0,"[0.06652832, 0.16467285, 0.2319336, 0.27746582...",2,1,"[-108.22129821777344, -117.17276000976562, -10..."
1,"[-0.0034484863, -0.0024414062, -0.0015869141, ...",2,1,"[-369.7628173828125, -354.6846618652344, -351...."
2,"[-0.10986328, -0.076049805, -0.07336426, -0.08...",3,1,"[-105.29022216796875, -103.15363311767578, -11..."
3,"[0.012420654, 0.07501221, 0.05819702, 0.014953...",3,1,"[-132.9656219482422, -119.15519714355469, -87...."
4,"[-0.035217285, -0.03540039, 0.018859863, 0.061...",2,1,"[-121.95355224609375, -135.16209411621094, -15..."


In [287]:
mfcc_list_test = []

for x in test['x']:
    
    mfcc = np.array(librosa.feature.mfcc(y=x, sr=sr)).flatten()
    zeros = np.zeros((26280 - len(mfcc)))
    mfcc_list_test.append(np.concatenate((mfcc, zeros), axis=0))
    
test['mfcc'] = mfcc_list_test
test.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


Unnamed: 0,x,y,y2,mfcc
11,"[0.0074768066, -0.009674072, -0.011138916, -0....",2,1,"[-175.67343139648438, -142.95144653320312, -11..."
24,"[0.058685303, 0.043151855, 0.06188965, 0.08190...",3,1,"[-71.61068725585938, -80.95719909667969, -98.1..."
25,"[-0.016723633, 0.0024719238, -0.0020751953, 0....",0,0,"[-104.91387939453125, -112.64622497558594, -13..."
28,"[-0.024993896, -0.018157959, -0.020233154, 0.0...",0,0,"[-117.60298919677734, -112.92239379882812, -13..."
36,"[0.05722046, 0.06097412, 0.06286621, 0.0684204...",0,0,"[-283.2794189453125, -298.8136901855469, -313...."


## LPC

In [288]:
lpc_list = []

for x in train['x']:
    
    lpc = librosa.lpc(x, 6)
    lpc_list.append(lpc)
    
train['lpc'] = lpc_list
train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,x,y,y2,mfcc,lpc
0,"[0.06652832, 0.16467285, 0.2319336, 0.27746582...",2,1,"[-108.22129821777344, -117.17276000976562, -10...","[1.0, -2.2199454, 2.1710544, -1.6874472, 1.325..."
1,"[-0.0034484863, -0.0024414062, -0.0015869141, ...",2,1,"[-369.7628173828125, -354.6846618652344, -351....","[1.0, -0.6939274, 0.24118969, -0.18175718, 0.0..."
2,"[-0.10986328, -0.076049805, -0.07336426, -0.08...",3,1,"[-105.29022216796875, -103.15363311767578, -11...","[1.0, -0.58800584, -0.2648997, -0.13331528, 0...."
3,"[0.012420654, 0.07501221, 0.05819702, 0.014953...",3,1,"[-132.9656219482422, -119.15519714355469, -87....","[1.0, -1.5170932, 1.2510097, -1.3537242, 0.940..."
4,"[-0.035217285, -0.03540039, 0.018859863, 0.061...",2,1,"[-121.95355224609375, -135.16209411621094, -15...","[1.0, -2.7162657, 3.5172086, -3.239585, 2.3423..."


In [289]:
lpc_list = []

for x in test['x']:
    
    lpc = librosa.lpc(x, 6)
    lpc_list.append(lpc)
    
test['lpc'] = lpc_list
test.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,x,y,y2,mfcc,lpc
11,"[0.0074768066, -0.009674072, -0.011138916, -0....",2,1,"[-175.67343139648438, -142.95144653320312, -11...","[1.0, -0.44715104, -0.35266197, 0.030646903, -..."
24,"[0.058685303, 0.043151855, 0.06188965, 0.08190...",3,1,"[-71.61068725585938, -80.95719909667969, -98.1...","[1.0, -1.2091802, 0.45889673, -0.061906826, -0..."
25,"[-0.016723633, 0.0024719238, -0.0020751953, 0....",0,0,"[-104.91387939453125, -112.64622497558594, -13...","[1.0, -0.6648732, -0.28418458, 0.19508383, 0.0..."
28,"[-0.024993896, -0.018157959, -0.020233154, 0.0...",0,0,"[-117.60298919677734, -112.92239379882812, -13...","[1.0, -1.2219975, -0.020330312, 0.52670175, -0..."
36,"[0.05722046, 0.06097412, 0.06286621, 0.0684204...",0,0,"[-283.2794189453125, -298.8136901855469, -313....","[1.0, -0.6768958, -0.041079897, -0.01663456, -..."


# First Scenario

## Plot functions

In [None]:
# pesquisar

## Using MFCC

In [220]:
model_mfcc = svm.SVC()
model_mfcc.fit(train['mfcc'].to_list(), train['y'].to_list())



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [221]:
print('Train accuracy: {:.1%}'.format(model_mfcc.score(train['mfcc'].to_list(), train['y'].to_list())))
print('Test accuracy: {:.1%}'.format(model_mfcc.score(test['mfcc'].to_list(), test['y'].to_list())))

Train accuracy: 100.0%
Test accuracy: 24.4%


## Using LPC

In [248]:
model_lpc = svm.SVC()
model_lpc.fit(train['lpc'].to_list(), train['y'].to_list())



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [249]:
print('Train accuracy: {:.1%}'.format(model_lpc.score(train['lpc'].to_list(), train['y'].to_list())))
print('Test accuracy: {:.1%}'.format(model_lpc.score(test['lpc'].to_list(), test['y'].to_list())))

Train accuracy: 70.1%
Test accuracy: 69.8%


## Using MFCC and LPC

# Second Scenario

In [290]:
model_lpc = svm.SVC()
model_lpc.fit(train['lpc'].to_list(), train['y2'].to_list())



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=None,
  shrinking=True, tol=0.001, verbose=False)

In [291]:
print('Train accuracy: {:.1%}'.format(model_lpc.score(train['lpc'].to_list(), train['y2'].to_list())))
print('Test accuracy: {:.1%}'.format(model_lpc.score(test['lpc'].to_list(), test['y2'].to_list())))

Train accuracy: 78.6%
Test accuracy: 73.5%


## Second Scenario