## Projekt 1

Klasyfikacja instrumentów za pomocą wektorów nośnych

Problem badawczy, baza danych oraz sposób postępowania zgodne z tymi przedstawionymi w dokumentacji projektu

In [34]:
# Importing libraries

import scipy.stats
import os
import librosa
import pickle
import optuna
import pandas as pd

import numpy as np
import lightgbm as lgb
import matplotlib.pyplot as plt

from collections import Counter
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import roc_auc_score, roc_curve, auc, precision_recall_curve, accuracy_score, recall_score, precision_score, f1_score, make_scorer, confusion_matrix, log_loss
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate, StratifiedKFold
from pathlib import Path
from sklearn.model_selection import train_test_split

## 1. Preprocessing

Wczytanie plików do data frame, przypisanie labeli odpowienim plikom reprezentującym dany instrument (klasę), wykestrahowanie 13 współczynników MFCC, zweryfikowanie czy wszystkie nagrania są zgodne co do długości, częstotliwości próbkowania, przeprowadzenie standaryzacji za pomocą Standard Scaler

Oznaczenia klas:
Cello (cel) - 0,
Clarinet (cla) - 1,
Flute (flu) - 2,
Acoustic guitar (gac) - 3,
Electric guitar (gel) - 4,
Organ (org) - 5,
Piano (pia) - 6,
Saxophone (sax) - 7,
Trumpet (tru) - 8,
Violin (vio) - 9,
Human singing voice (voi) - 10





In [8]:
# Loading and labeling data

file_names = []
file_data = []
labels = []
sampling_freqs = []

for directory in os.listdir("irmas"):
    for file in os.listdir(os.path.join(Path.cwd(),"irmas",directory)):
        file_names.append(file)
        y, sr = librosa.load(os.path.join(Path.cwd(),"irmas",directory,file), sr=44100)
        file_data.append(y)
        sampling_freqs.append(sr)
        if file.startswith('[cel]'):
            labels.append(0)
        if file.startswith('[cla]'):
            labels.append(1)
        if file.startswith('[flu]'):
            labels.append(2)
        if file.startswith('[gac]'):
            labels.append(3)            
        if file.startswith('[gel]'):
            labels.append(4)
        if file.startswith('[org]'):
            labels.append(5)            
        if file.startswith('[pia]'):
            labels.append(6)            
        if file.startswith('[sax]'):
            labels.append(7)            
        if file.startswith('[tru]'):
            labels.append(8)            
        if file.startswith('[vio]'):
            labels.append(9)            
        if file.startswith('[voi]'):
            labels.append(10)            
            
            
# Adding data to a dataframe
df = pd.DataFrame(columns=['file_name', 'file_data', 'sampling_frequency', 'label'])
df['file_name'] = file_names
df['file_data'] = file_data
df['sampling_frequency'] = sampling_freqs
df['label'] = labels

In [9]:
display(df)

Unnamed: 0,file_name,file_data,sampling_frequency,label
0,[cel][cla]0001__1.wav,"[0.022232056, 0.02468872, 0.02645874, 0.027236...",44100,0
1,[cel][cla]0001__2.wav,"[-0.0033721924, -0.003967285, -0.003479004, -0...",44100,0
2,[cel][cla]0001__3.wav,"[0.00061035156, 0.0001373291, -0.0005950928, -...",44100,0
3,[cel][cla]0002__1.wav,"[-0.00024414062, -0.0013885498, -0.0026397705,...",44100,0
4,[cel][cla]0002__2.wav,"[-0.00018310547, 0.00076293945, 0.0011901855, ...",44100,0
...,...,...,...,...
1555,[voi][pop_roc]2546__3.wav,"[0.0059509277, 0.015350342, 0.023391724, 0.032...",44100,10
1556,[voi][pop_roc]2547__1.wav,"[-0.11528015, -0.15473938, -0.15000916, -0.113...",44100,10
1557,[voi][pop_roc]2547__2.wav,"[0.1661377, 0.15690613, 0.12338257, 0.07142639...",44100,10
1558,[voi][pop_roc]2547__3.wav,"[0.14439392, 0.12376404, 0.10206604, 0.0853881...",44100,10


Wszystkie dane mają tą samą długość oraz częstotliwość próbkowania (sprawdzane przy drugim modelu)

In [45]:
# Extracting MFCC Coefficients

mfccs = []
mfccs_delta = []
mfccs_deltasq = []

for data in df["file_data"]:
    mfcc = librosa.feature.mfcc(y=data, sr=22050, n_mfcc=13).flatten()
    mfccs.append(mfcc)
    mfccs_delta.append(librosa.feature.delta(mfcc))
    mfccs_deltasq.append(librosa.feature.delta(mfcc, order=2))
                 
               
df['mfcc'] = mfccs
df['mfcc_delta'] = mfccs_delta
df['mfcc_delta_sq'] = mfccs_deltasq

In [46]:
display(df)

Unnamed: 0,file_name,file_data,sampling_frequency,label,mfcc,mfcc_delta,mfcc_delta_sq
0,[cel][cla]0001__1.wav,"[0.022232056, 0.02468872, 0.02645874, 0.027236...",44100,0,"[-452.01215, -463.70468, -473.78613, -472.0447...","[-1.9220424, -1.9220424, -1.9220424, -1.922042...","[0.7014014, 0.7014014, 0.7014014, 0.7014014, 0..."
1,[cel][cla]0001__2.wav,"[-0.0033721924, -0.003967285, -0.003479004, -0...",44100,0,"[-353.5875, -345.70627, -341.9458, -340.2455, ...","[1.5634588, 1.5634588, 1.5634588, 1.5634588, 1...","[-1.1336648, -1.1336648, -1.1336648, -1.133664..."
2,[cel][cla]0001__3.wav,"[0.00061035156, 0.0001373291, -0.0005950928, -...",44100,0,"[-426.93094, -434.9382, -442.07135, -440.38293...","[0.46890718, 0.46890718, 0.46890718, 0.4689071...","[1.7306954, 1.7306954, 1.7306954, 1.7306954, 1..."
3,[cel][cla]0002__1.wav,"[-0.00024414062, -0.0013885498, -0.0026397705,...",44100,0,"[-465.59283, -472.84494, -483.94357, -485.5826...","[-0.92114764, -0.92114764, -0.92114764, -0.921...","[1.5679516, 1.5679516, 1.5679516, 1.5679516, 1..."
4,[cel][cla]0002__2.wav,"[-0.00018310547, 0.00076293945, 0.0011901855, ...",44100,0,"[-464.96738, -462.2898, -464.4726, -464.46667,...","[-0.2734192, -0.2734192, -0.2734192, -0.273419...","[-0.3988473, -0.3988473, -0.3988473, -0.398847..."
...,...,...,...,...,...,...,...
1555,[voi][pop_roc]2546__3.wav,"[0.0059509277, 0.015350342, 0.023391724, 0.032...",44100,10,"[-178.58676, -172.2699, -171.96695, -177.94272...","[-3.9553516, -3.9553516, -3.9553516, -3.955351...","[-1.0631608, -1.0631608, -1.0631608, -1.063160..."
1556,[voi][pop_roc]2547__1.wav,"[-0.11528015, -0.15473938, -0.15000916, -0.113...",44100,10,"[-127.25734, -122.60771, -121.361145, -122.555...","[-1.1543529, -1.1543529, -1.1543529, -1.154352...","[-0.72465014, -0.72465014, -0.72465014, -0.724..."
1557,[voi][pop_roc]2547__2.wav,"[0.1661377, 0.15690613, 0.12338257, 0.07142639...",44100,10,"[-134.16048, -133.197, -144.1307, -150.18326, ...","[11.054637, 11.054637, 11.054637, 11.054637, 1...","[6.5335608, 6.5335608, 6.5335608, 6.5335608, 6..."
1558,[voi][pop_roc]2547__3.wav,"[0.14439392, 0.12376404, 0.10206604, 0.0853881...",44100,10,"[-119.69623, -61.883232, -53.847103, -72.93917...","[-2.1233065, -2.1233065, -2.1233065, -2.123306...","[-3.1225982, -3.1225982, -3.1225982, -3.122598..."


In [47]:
mfcc_parameters = []
for iteration, value in enumerate(df["mfcc"]):
    mfcc_stack = []
    for i in range(0,12):
        data_stack = np.hstack((np.mean(df["mfcc"][iteration][i]), 
                    np.std(df["mfcc"][iteration][i]), 
                    np.median(df["mfcc"][iteration][i]), 
                    np.percentile(df["mfcc"][iteration][i], 25), 
                    np.percentile(df["mfcc"][iteration][i], 75), 
                    scipy.stats.iqr(df["mfcc"][iteration][i], rng=(10, 90)),
                    scipy.stats.kurtosis(df["mfcc"][iteration][i]),
                    scipy.stats.skew(df["mfcc"][iteration][i]),
                    np.min(df["mfcc"][iteration][i]),
                    np.max(df["mfcc"][iteration][i])
                    ))
        mfcc_stack = np.hstack((mfcc_stack, data_stack))
    mfcc_parameters.append(mfcc_stack)

df["mfcc_parameters"] = mfcc_parameters

In [48]:
display(df)

Unnamed: 0,file_name,file_data,sampling_frequency,label,mfcc,mfcc_delta,mfcc_delta_sq,mfcc_parameters
0,[cel][cla]0001__1.wav,"[0.022232056, 0.02468872, 0.02645874, 0.027236...",44100,0,"[-452.01215, -463.70468, -473.78613, -472.0447...","[-1.9220424, -1.9220424, -1.9220424, -1.922042...","[0.7014014, 0.7014014, 0.7014014, 0.7014014, 0...","[-452.01214599609375, 0.0, -452.01214599609375..."
1,[cel][cla]0001__2.wav,"[-0.0033721924, -0.003967285, -0.003479004, -0...",44100,0,"[-353.5875, -345.70627, -341.9458, -340.2455, ...","[1.5634588, 1.5634588, 1.5634588, 1.5634588, 1...","[-1.1336648, -1.1336648, -1.1336648, -1.133664...","[-353.5874938964844, 0.0, -353.5874938964844, ..."
2,[cel][cla]0001__3.wav,"[0.00061035156, 0.0001373291, -0.0005950928, -...",44100,0,"[-426.93094, -434.9382, -442.07135, -440.38293...","[0.46890718, 0.46890718, 0.46890718, 0.4689071...","[1.7306954, 1.7306954, 1.7306954, 1.7306954, 1...","[-426.9309387207031, 0.0, -426.9309387207031, ..."
3,[cel][cla]0002__1.wav,"[-0.00024414062, -0.0013885498, -0.0026397705,...",44100,0,"[-465.59283, -472.84494, -483.94357, -485.5826...","[-0.92114764, -0.92114764, -0.92114764, -0.921...","[1.5679516, 1.5679516, 1.5679516, 1.5679516, 1...","[-465.59283447265625, 0.0, -465.59283447265625..."
4,[cel][cla]0002__2.wav,"[-0.00018310547, 0.00076293945, 0.0011901855, ...",44100,0,"[-464.96738, -462.2898, -464.4726, -464.46667,...","[-0.2734192, -0.2734192, -0.2734192, -0.273419...","[-0.3988473, -0.3988473, -0.3988473, -0.398847...","[-464.9673767089844, 0.0, -464.9673767089844, ..."
...,...,...,...,...,...,...,...,...
1555,[voi][pop_roc]2546__3.wav,"[0.0059509277, 0.015350342, 0.023391724, 0.032...",44100,10,"[-178.58676, -172.2699, -171.96695, -177.94272...","[-3.9553516, -3.9553516, -3.9553516, -3.955351...","[-1.0631608, -1.0631608, -1.0631608, -1.063160...","[-178.58676147460938, 0.0, -178.58676147460938..."
1556,[voi][pop_roc]2547__1.wav,"[-0.11528015, -0.15473938, -0.15000916, -0.113...",44100,10,"[-127.25734, -122.60771, -121.361145, -122.555...","[-1.1543529, -1.1543529, -1.1543529, -1.154352...","[-0.72465014, -0.72465014, -0.72465014, -0.724...","[-127.25733947753906, 0.0, -127.25733947753906..."
1557,[voi][pop_roc]2547__2.wav,"[0.1661377, 0.15690613, 0.12338257, 0.07142639...",44100,10,"[-134.16048, -133.197, -144.1307, -150.18326, ...","[11.054637, 11.054637, 11.054637, 11.054637, 1...","[6.5335608, 6.5335608, 6.5335608, 6.5335608, 6...","[-134.1604766845703, 0.0, -134.1604766845703, ..."
1558,[voi][pop_roc]2547__3.wav,"[0.14439392, 0.12376404, 0.10206604, 0.0853881...",44100,10,"[-119.69623, -61.883232, -53.847103, -72.93917...","[-2.1233065, -2.1233065, -2.1233065, -2.123306...","[-3.1225982, -3.1225982, -3.1225982, -3.122598...","[-119.69622802734375, 0.0, -119.69622802734375..."


In [55]:
# Standardizing Data

X_train, X_test, y_train, y_test = train_test_split(df['mfcc'].to_list(), df['label'].to_list(), test_size=0.2, random_state=42, stratify=df['label'].to_list())

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## 2. Klasyfikacja

Przeprowadzenie klasyfikacji za pomocą wektorów nośnych (Support Vector Machine) i optymalizacja hiperparametrów za pomocą Optuna

In [65]:
# Initial classification without optimizing hyperparameters

SVM = SVC(random_state=42)

SVM.fit(X_train_scaled, y_train)

SVM_test_preds = SVM.predict(X_test_scaled)
print('test recall = ', recall_score(y_test, SVM_test_preds, average='macro'))
print('test precision = ', precision_score(y_test, SVM_test_preds, average='macro'))
print('test accuracy = ', accuracy_score(y_test, SVM_test_preds))
print('test F1 = ', f1_score(y_test, SVM_test_preds, average='macro'))

print(confusion_matrix(y_test, SVM_test_preds))

test recall =  0.4213453213453214
test precision =  0.4636847760224384
test accuracy =  0.42948717948717946
test F1 =  0.4239136661036161
[[14  1  3  3  2  1  2  0  1  1  2]
 [ 2  4  6  3  2  1  1  0  2  3  2]
 [ 3  1 18  1  1  2  3  0  0  0  1]
 [ 3  1  0 15  1  2  4  0  1  0  3]
 [ 0  0  0  3  9  4  0  0  0  6  6]
 [ 4  2  0  2  4 12  1  0  1  0  4]
 [ 1  0  1  4  2  2 11  1  0  1  5]
 [ 1  0  4  1  2  0  3  6  1  2  0]
 [ 4  1  0  0  0  3  1  1 15  1  4]
 [ 2  0  1  1  5  0  1  0  0 14  6]
 [ 2  0  0  2  5  3  0  0  0  2 16]]


In [57]:
# Optimizing hyperparameters

scoring = {'f1_macro': make_scorer(f1_score, average='macro')}

In [58]:
def objective(trial, model, get_space, X, y):
    model_space = get_space(trial)

    mdl = model(**model_space)
    scores = cross_validate(mdl, X, y, scoring=scoring, cv=StratifiedKFold(n_splits=5), return_train_score=True)

    return np.mean(scores['test_f1_macro'])

In [59]:
model = SVC

def get_space(trial): 
    space = {"C": trial.suggest_uniform("C", 0, 25), 
           'kernel': trial.suggest_categorical("kernel",['linear', 'poly','sigmoid','rbf']),
           "degree": trial.suggest_int('degree', 2, 10),
            'gamma': trial.suggest_categorical('gamma', ['scale','auto']),
            'coef0': trial.suggest_uniform('coef0',-10,10),
            'shrinking': trial.suggest_categorical('shrinking',[True,False])}
    return space

trials = 50

In [60]:
study = optuna.create_study(direction='maximize')
study.optimize(lambda x: objective(x, model, get_space, X_train_scaled, y_train), n_trials=trials)

print('params: ', study.best_params)

[32m[I 2023-01-09 23:54:08,658][0m A new study created in memory with name: no-name-318f6d87-940d-4279-9753-c20dbfa4ad44[0m
[32m[I 2023-01-09 23:54:36,178][0m Trial 0 finished with value: 0.07794697620711 and parameters: {'C': 23.33299644101683, 'kernel': 'sigmoid', 'degree': 3, 'gamma': 'auto', 'coef0': -5.003086872629923, 'shrinking': True}. Best is trial 0 with value: 0.07794697620711.[0m
[32m[I 2023-01-09 23:54:58,416][0m Trial 1 finished with value: 0.3504816984381666 and parameters: {'C': 0.25058562209055446, 'kernel': 'poly', 'degree': 2, 'gamma': 'auto', 'coef0': 6.738452321045582, 'shrinking': False}. Best is trial 1 with value: 0.3504816984381666.[0m
[32m[I 2023-01-09 23:55:21,284][0m Trial 2 finished with value: 0.4400861074712689 and parameters: {'C': 3.910946914014529, 'kernel': 'poly', 'degree': 9, 'gamma': 'auto', 'coef0': 6.489149050754882, 'shrinking': False}. Best is trial 2 with value: 0.4400861074712689.[0m
[32m[I 2023-01-09 23:55:46,137][0m Trial 3 fi

[32m[I 2023-01-10 00:08:23,361][0m Trial 29 finished with value: 0.1351212610136578 and parameters: {'C': 8.030846093190783, 'kernel': 'sigmoid', 'degree': 8, 'gamma': 'scale', 'coef0': 3.9943346008317295, 'shrinking': False}. Best is trial 7 with value: 0.48090848354176535.[0m
[32m[I 2023-01-10 00:08:46,924][0m Trial 30 finished with value: 0.435716544537543 and parameters: {'C': 22.155126567013923, 'kernel': 'poly', 'degree': 7, 'gamma': 'auto', 'coef0': 5.390026703846307, 'shrinking': True}. Best is trial 7 with value: 0.48090848354176535.[0m
[32m[I 2023-01-10 00:09:18,226][0m Trial 31 finished with value: 0.48090848354176535 and parameters: {'C': 14.980942642689843, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale', 'coef0': -2.081497858267171, 'shrinking': True}. Best is trial 7 with value: 0.48090848354176535.[0m
[32m[I 2023-01-10 00:09:49,022][0m Trial 32 finished with value: 0.48090848354176535 and parameters: {'C': 22.49713646147131, 'kernel': 'rbf', 'degree': 8, 'gamm

params:  {'C': 10.083767984933836, 'kernel': 'rbf', 'degree': 2, 'gamma': 'auto', 'coef0': -0.45106222891602954, 'shrinking': True}


In [61]:
# Classification with optimized hyperparameters

SVM = SVC(C=10.083767984933836, kernel='rbf',gamma='auto',random_state=42, coef0=-0.45106222891602954, shrinking=True)

SVM.fit(X_train_scaled, y_train)

SVM_test_preds = SVM.predict(X_test_scaled)


In [63]:
print('test recall = ', recall_score(y_test, SVM_test_preds, average='macro'))
print('test precision = ', precision_score(y_test, SVM_test_preds, average='macro'))
print('test accuracy = ', accuracy_score(y_test, SVM_test_preds))
print('test F1 = ', f1_score(y_test, SVM_test_preds, average='macro'))

print(confusion_matrix(y_test, SVM_test_preds))

test recall =  0.44027639027639026
test precision =  0.44703866318286384
test accuracy =  0.44551282051282054
test F1 =  0.43988739551637734
[[19  1  4  2  1  1  1  0  0  1  0]
 [ 2  7  5  1  2  2  0  1  3  3  0]
 [ 3  2 16  2  0  1  2  1  2  0  1]
 [ 1  2  2 16  2  0  3  0  2  0  2]
 [ 0  2  0  1  8  5  0  0  1  6  5]
 [ 3  2  1  1  4 13  1  0  1  1  3]
 [ 1  1  1  6  1  1  9  3  1  0  4]
 [ 2  0  2  1  1  0  3  8  1  2  0]
 [ 3  1  0  1  0  1  1  2 18  0  3]
 [ 3  0  1  1  3  0  2  0  0 14  6]
 [ 2  0  0  3  7  4  0  0  0  3 11]]
