<a href="https://colab.research.google.com/github/Ilhom-Utkirov/data_science/blob/main/librosa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# **Uploading dataset**


# Import and unzip dataset
!wget -cq https://github.com/dbdmg/data-science-lab/raw/master/datasets/free-spoken-digit.zip
!unzip free-spoken-digit.zip

# **Features and labels creation**


import glob
import numpy as np
import librosa

'''Using librosa can be slower, but provides normalization of the data and a
new standard sample rate to each file
'''

files = glob.glob('dev' + '/*.wav')
X = []
y = []

for f in files:
  data, sample_rate = librosa.load(f)
  X.append(data)
  label = int(f.split("_")[1].split(".")[0])
  y.append(label)

print(len(X[0]))

# **Preprocessing**

# For this step I used **Mel-Frequency Cepstral Coefficients (MFCC)**, which involve taking the whole normalized (-1, 1) data set and calculates the fourier transform on each signal. After this it is transferred to the mel scale (a scale based on the non-linear way humans percieve a sound) via a logaritmic transformation. Finally, we take the average the data in every range in the scale (which is based in the number of scales we want to use. 20 in this case). This way we have the data features preprocessed and with the same length.

def preprocess(data, sample_rate):
  mfcc = [ librosa.feature.mfcc(x, sample_rate) for x in data ]
  data_pre = [ np.mean(x.T, axis=0) for x in mfcc ]
  return data_pre

X_p = preprocess(X, sample_rate)

print(X_p[:5])

# Some non tuned classifiers to decide on which to perform hyper-parameter tunning later on. the best performer will be chosen for this.

# Vanilla Classifiers

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support

X_train, X_val, y_train, y_val = train_test_split(X_p, y, test_size=0.2)
clf = RandomForestClassifier()

clf.fit(X_train, y_train)
y_val_pred = clf.predict(X_val)

p, r, f1, s = precision_recall_fscore_support(y_val, y_val_pred)
print("Vanilla RandomForest:", f1.mean())

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(X_train, y_train)

y_val_pred = clf.predict(X_val)

p, r, f1, s = precision_recall_fscore_support(y_val, y_val_pred)
print("Vanilla SVC:", f1.mean())

# **Hyper-parameters Tunning**

# Grid search based hyper-parameter tunning on the SVC.

# Grid Search SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Set the parameters by cross-validation
tuned_parameters = [
    {"kernel": ["rbf"], "gamma": ['scale','auto'], "C": [ 1, 10, 100, 1000 ]},

    {"kernel": ["sigmoid"], "C": [1, 10, 100, 1000]},
]

print("# Tuning hyper-parameters for F1-macro")
print()

clf = GridSearchCV(SVC(), tuned_parameters, scoring="f1_macro")
clf.fit(X_train, y_train)

print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_["mean_test_score"]
stds = clf.cv_results_["std_test_score"]
for mean, std, params in zip(means, stds, clf.cv_results_["params"]):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_val, clf.predict(X_val)
print(classification_report(y_true, y_pred))
print()

# Best parameters implemented in th/e model

# Tuned Classifiers

best_params = {'C': 10, 'gamma': 'auto', 'kernel': 'rbf'}

svc = make_pipeline(StandardScaler(), SVC(gamma='auto', C=10))
svc.fit(X_train, y_train)

y_val_pred_svc = svc.predict(X_val)

p, r, f1, s = precision_recall_fscore_support(y_val, y_val_pred_svc)
print("Vanilla SVC:", f1.mean())

# **Test Phase**. Here we test the trained classifier on never-seen-before data. As always we extract and preprocess the data.

files = glob.glob('eval' + '/*.wav')
file_ids = [ int(f.split('/')[1].split('.')[0]) for f in files ]
X_test = []

for f in files:
  data, sample_rate = librosa.load(f)
  X_test.append(data)

X_test = preprocess(X_test, sample_rate)

y_test = svc.predict(X_test)

print(y_test[:5])
print(files[:5])
print(file_ids[:5])

# Here we merge both, the id of the files and the results, into a single Numpy array that we later on convert into a DataFrame

f_w = np.stack((file_ids, y_test), axis=1)
sorted_w = f_w[f_w[:,0].argsort()]

print(f_w[:5])
print(f_w.shape)
print(sorted_w[:5])

# By converting the Numpy array into a Pandas' DataFrame we are able to easily write the .csv for testing.

import pandas as pd

df = pd.DataFrame(data=sorted_w, columns=['Id', 'Predicted'])
df.to_csv('results.csv',index=False)