# Part 1 
Build a machine learning pipeline that takes as an input a Potter or a StarWars audio segment and predicts its song label (either Harry or StarWars)

•	The validation accuracy of the pipeline is not as important, but how you justify the accuracy, and the method is more important.

Conciseness in your writing (10%).
Correctness in your methodology (30%).
Correctness in your analysis and conclusions (30%).
Completeness (10%).
Originality (20%).

# 2 Problem formulation

Describe the machine learning problem that you want to solve and explain what's interesting about it.

# 3 Machine Learning pipeline

Describe your ML pipeline. Clearly identify its input and output, any intermediate stages (for instance, transformation -> models), and intermediate data moving from one stage to the next. Note that your pipeline does not need to include all the stages. 

# 4 Transformation stage

Describe any transformations, such as feature extraction. Identify input and output.

In [22]:
import pandas as pd
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
%matplotlib inline

import os

from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline

import xgboost
from xgboost import XGBClassifier

In [3]:
dir = '/content/drive/MyDrive/Pytorch_Audio'

potter1_dir = '/content/drive/MyDrive/Pytorch_Audio/Potter_1'
potter2_dir = '/content/drive/MyDrive/Pytorch_Audio/Potter_2'

star_wars1_dir = '/content/drive/MyDrive/Pytorch_Audio/StarWars_1'
star_wars2_dir = '/content/drive/MyDrive/Pytorch_Audio/StarWars_2'

In [4]:
potter1_list = os.listdir(potter1_dir)
potter2_list = os.listdir(potter2_dir)

star_wars1_list = os.listdir(star_wars1_dir)
star_wars2_list = os.listdir(star_wars2_dir)

In [5]:
potter1_df = pd.DataFrame({'Name': potter1_list, 'Label': 'Potter', 'Folder': 'Potter_1'})
potter2_df = pd.DataFrame({'Name': potter2_list, 'Label': 'Potter', 'Folder': 'Potter_2'})

potter1_df['Path'] = potter1_df['Name'].apply(lambda x : potter1_dir +'/' + x)
potter2_df['Path'] = potter2_df['Name'].apply(lambda x : potter2_dir +'/' + x)

In [6]:
potter_df = pd.concat([potter1_df, potter2_df], join='inner')

potter_df.reset_index(inplace=True)
potter_df.drop(['index'], axis=1, inplace = True)

def mfcc_extractor(file):
  audio, sample_rate = librosa.load(file, res_type='kaiser_fast') 
  mfccs_features = librosa.feature.mfcc(audio, sr=sample_rate, n_mfcc=64)
  mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
  
  return mfccs_scaled_features

potter_df['MFCC'] = potter_df['Path'].apply(lambda x: mfcc_extractor(x))

In [7]:
potter_df_train, potter_df_test = potter_df.iloc[:-20,:], potter_df.iloc[-20:,:]

potter_df_train.shape, potter_df_test.shape

((397, 5), (20, 5))

In [8]:
potter_df.head()

Unnamed: 0,Name,Label,Folder,Path,MFCC
0,S2_hum_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-661.8584, 108.642815, 45.156384, 38.118088, ..."
1,S3_hum_1_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-346.7682, 136.55504, -0.26411316, 28.942776,..."
2,S1_whistle_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-559.3039, 109.55505, -44.322174, 30.625387, ..."
3,S2_whistle_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-693.1078, 68.701996, -20.831884, 8.191821, 2..."
4,S1_hum_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-570.3254, 165.23584, 11.886826, 28.466051, -..."


In [9]:
star_wars1_df = pd.DataFrame({'Name': star_wars1_list, 'Label': 'Star Wars', 'Folder': 'StarWars_1'})
star_wars2_df = pd.DataFrame({'Name': star_wars2_list, 'Label': 'Star Wars', 'Folder': 'StarWars_2'})

star_wars1_df['Path'] = star_wars1_df['Name'].apply(lambda x : star_wars1_dir +'/' + x)
star_wars2_df['Path'] = star_wars2_df['Name'].apply(lambda x : star_wars2_dir +'/' + x)

In [10]:
star_wars_df = pd.concat([star_wars1_df, star_wars2_df], join='inner')

star_wars_df.reset_index(inplace=True)
star_wars_df.drop(['index'], axis=1, inplace = True)

def mfcc_extractor(file_):
  audio, sample_rate = librosa.load(file_, res_type='kaiser_fast') 
  mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=64)
  mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
  
  return mfccs_scaled_features

star_wars_df['MFCC'] = star_wars_df['Path'].apply(lambda x: mfcc_extractor(x))

In [11]:
star_wars_df_train, star_wars_df_test = star_wars_df.iloc[:-20,:], star_wars_df.iloc[-20:,:]

star_wars_df_train.shape, star_wars_df_test.shape

((397, 5), (20, 5))

In [12]:
star_wars_df.head()

Unnamed: 0,Name,Label,Folder,Path,MFCC
0,S1_hum_2_StarWars.wav,Star Wars,StarWars_1,/content/drive/MyDrive/Pytorch_Audio/StarWars_...,"[-508.2717, 166.03279, -4.6776953, 17.328432, ..."
1,S3_hum_4_StarWars.wav,Star Wars,StarWars_1,/content/drive/MyDrive/Pytorch_Audio/StarWars_...,"[-262.82635, 144.29163, -36.616604, -1.0275717..."
2,S4_hum_4_StarWars.wav,Star Wars,StarWars_1,/content/drive/MyDrive/Pytorch_Audio/StarWars_...,"[-395.11832, 96.01256, -5.4954414, -14.451133,..."
3,S3_hum_3_StarWars.wav,Star Wars,StarWars_1,/content/drive/MyDrive/Pytorch_Audio/StarWars_...,"[-277.7894, 140.88435, -17.97525, -4.7927794, ..."
4,S4_hum_3_StarWars.wav,Star Wars,StarWars_1,/content/drive/MyDrive/Pytorch_Audio/StarWars_...,"[-389.81763, 97.17954, -12.334502, -16.95927, ..."


In [13]:
df_train = pd.concat([potter_df_train, star_wars_df_train], join='inner')

df_train.reset_index(inplace=True)
df_train.drop(['index'], axis=1, inplace = True)

df_train['Encoded Label'] = df_train['Label'].apply(lambda x: 1 if x == 'Potter' else 0)

In [14]:
df_train[['Label','Encoded Label']].value_counts()

Label      Encoded Label
Star Wars  0                397
Potter     1                397
dtype: int64

In [15]:
df_train['Folder'].value_counts()

StarWars_1    210
Potter_1      210
Potter_2      187
StarWars_2    187
Name: Folder, dtype: int64

In [16]:
df_test = pd.concat([potter_df_test, star_wars_df_test], join='inner')

df_test.reset_index(inplace=True)
df_test.drop(['index'], axis=1, inplace = True)

df_test['Encoded Label'] = df_test['Label'].apply(lambda x: 1 if x == 'Potter' else 0)

In [17]:
df_train.shape, df_test.shape

((794, 6), (40, 6))

In [18]:
df_train.head()

Unnamed: 0,Name,Label,Folder,Path,MFCC,Encoded Label
0,S2_hum_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-661.8584, 108.642815, 45.156384, 38.118088, ...",1
1,S3_hum_1_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-346.7682, 136.55504, -0.26411316, 28.942776,...",1
2,S1_whistle_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-559.3039, 109.55505, -44.322174, 30.625387, ...",1
3,S2_whistle_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-693.1078, 68.701996, -20.831884, 8.191821, 2...",1
4,S1_hum_2_Potter.wav,Potter,Potter_1,/content/drive/MyDrive/Pytorch_Audio/Potter_1/...,"[-570.3254, 165.23584, 11.886826, 28.466051, -...",1


# 5 Modelling

Describe the ML models that you will implement. 

In [26]:
X = np.array(df_train['MFCC'].tolist())
y = np.array(df_train['Encoded Label'].tolist())

In [29]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state = 47)

In [30]:
xgbc = XGBClassifier()

parameters = {'learning_rate': [0.01, 0.03, 0.05, 0.07, 0.001], 
              'max_depth': [3, 4, 5, 6, 12, 24, 36],
              'n_estimators': [5, 15, 25, 50, 100, 120],            # number of trees, change it to 1000 for better results
              }

xgb_gs = GridSearchCV(xgbc, parameters, n_jobs = -1,
                      cv = StratifiedKFold(n_splits= 10, shuffle = True, random_state = 47), 
                      scoring ='accuracy',
                      verbose = 2)

In [31]:
xgb_gs.fit(X_train, y_train)

Fitting 10 folds for each of 210 candidates, totalling 2100 fits


GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=47, shuffle=True),
             estimator=XGBClassifier(), n_jobs=-1,
             param_grid={'learning_rate': [0.01, 0.03, 0.05, 0.07, 0.001],
                         'max_depth': [3, 4, 5, 6, 12, 24, 36],
                         'n_estimators': [5, 15, 25, 50, 100, 120]},
             scoring='accuracy', verbose=2)

In [32]:
xgb_gs.best_params_

{'learning_rate': 0.07, 'max_depth': 3, 'n_estimators': 100}

In [33]:
new_xgb = XGBClassifier(learning_rate= 0.07, max_depth= 3, n_estimators= 100)

new_xgb.fit(X_train, y_train)

preds = new_xgb.predict(X_val)

In [34]:
print(classification_report(y_val, preds))

              precision    recall  f1-score   support

           0       0.62      0.59      0.61        78
           1       0.62      0.65      0.64        81

    accuracy                           0.62       159
   macro avg       0.62      0.62      0.62       159
weighted avg       0.62      0.62      0.62       159



In [None]:
# new_xgb.save_model("XBG_model.pth")

In [39]:
class_mapping = [
    "Star Wars",
    "Potter"
]

index = 0

test_mfcc = mfcc_extractor(df_test['Path'][index])
test_mfcc.shape = -1,64

prediction_index = new_xgb.predict(np.array(test_mfcc.tolist()))
prediction = class_mapping[prediction_index[0]]
expected = df_test['Label'][index]

print(f"Predicted: '{prediction}', expected: '{expected}'")

Predicted: 'Potter', expected: 'Potter'


# 6 Methodology

Describe how you will train and validate your models, how model performance is assesssed (i.e. accuracy, confusion matrix, etc)

# 7 Dataset

Describe the dataset that you will use to create your models and validate them. If you need to preprocess it, do it here. Include visualisations too. You can visualise raw data samples or extracted features.

# 8 Results

Carry out your experiments here, explain your results.

# 9 Conclusions

Your conclusions, improvements, etc should go here