# Musical Instrument Recognition in Solo-Instrument Recordings

### MIR Course, March 2018

#### A presentation by Venkatesh Shenoy Kadandale, 2017-18 SMC Master Student 

### Objective

To classify musical instrument sounds from solo recordings of 'bass guitar' and 'piccolo' categories. Focus will be on feature selection and classification.

### Dataset

Subset of Good-Sounds Dataset. Here's the [link](https://www.upf.edu/web/mtg/good-sounds?sid=395) to the complete dataset. The original dataset is provided with a [CC BY-NC 4.0 license](). I have used only a subset of this dataset for this task: 

-  50 Bass Guitar sounds (0000.wav to 0049.wav from good-sounds/sound_files/bass_alejandro_recordings/neumann)

-  50 Piccolo sounds (0000.wav to 0049.wav from good-sounds/sound_files/piccolo_irene_recordings/neumann)

This subset is temporarily made available [here](https://drive.google.com/open?id=1xfkq7MYGM0otQOAuwmE8gNMl70drvQkq).


In [None]:
# all the imports
import os
import urllib
import zipfile
import json
import itertools
import numpy as np
import matplotlib.pyplot as plt


import essentia
import essentia.standard as es
import pandas as pd #python library for data manipulation and analysis
import seaborn as sns; # for visualizing data

from sklearn import svm #libraries for machine learning
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.feature_selection import SelectFromModel
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectKBest

#external .py files
import download_file_from_google_drive #for downloading big files from google drive
import confirm_prompt #for confirming user action 
import json_flattener #for flatenning jsons

In [None]:
# [NOTE] Please set the path to dataset here(this is the path to directory where the genre folders {cla, dan ... spe} will be moved in)
path_to_dataset='../../../data/instruments/'

if not os.path.exists(path_to_dataset):
    os.umask(0) #To mask the permission restrictions on new files/directories being created
    os.makedirs(path_to_dataset,0o777) # 0o777 gives us full permissions for the folder

# Prompt to know if you want to skip downloading the dataset
skip_dataset_download=confirm_prompt.confirm(prompt='Would you like to skip downloading the data? \nEnter \'y\' if you already have dataset and \'n\' to download the dataset.\n [NOTE] : Downloading the dataset using this notebook can take up to 2 minutes.\n')
if(not skip_dataset_download):
    #This block downloads the dataset from google drive
    file_id='1xfkq7MYGM0otQOAuwmE8gNMl70drvQkq'
    filename=path_to_dataset+"instruments.zip"
    print("Downloading the dataset...")
    download_file_from_google_drive.download_file_from_google_drive(file_id,filename)
    print("Unzipping the data file.")
    #Unzip the file
    zip_ref = zipfile.ZipFile(filename, 'r')
    zip_ref.extractall(path_to_dataset)
    zip_ref.close()
    os.remove(filename)#Removing the zip file
    print('Data downloaded and unzipped to: ',path_to_dataset)
    
# [NOTE] If you already have the dataset, move the instrument folders {flute, trumpet} into path_to_dataset
instruments = os.listdir(path_to_dataset)
instruments.sort()

In [None]:
# A sample from each category
import IPython
IPython.display.display(IPython.display.Audio('../../../data/instruments/bass/0000.wav'))

IPython.display.display(IPython.display.Audio('../../../data/instruments/piccolo/0000.wav'))

### Feature Extraction

We extract all the low level features using Essentia's Music Extractor.

In [None]:
###### Prompt to know if you want to extract the features now or use the pre-extracted ones
skip_json_extraction=confirm_prompt.confirm(prompt='Would you like to skip extraction of feature jsons from the dataset? \nEnter \'y\' if you want to use the pre-extracted jsons and \'n\' to start extracting jsons.\n')

if(skip_json_extraction):
    #Download the pre-extracted feature jsons
    file_id='1KjP1evRzJZMGiRUva8i-_cbRHnrQJTI6'
    filename=path_to_dataset+"instrument_jsons.zip"
    print("Downloading the pre-extracted jsons...")
    urllib.request.urlretrieve('http://docs.google.com/uc?id='+file_id,filename)
    #Unzip the file
    zip_ref = zipfile.ZipFile(filename, 'r')
    zip_ref.extractall(path_to_dataset)
    zip_ref.close()
    os.remove(filename)#Removing the zip file
    print('Data downloaded and unzipped to the instrument specific folders.')    
else:    
    #Extract all the features in json format
    for instrument in instruments:
        print("[Instrument] : " + instrument)
        files=sorted(os.listdir(path_to_dataset+instrument))
        print("Number of files : "+str(len(files)))
        for file in files:
            if(file.endswith('.wav')):
                filename=path_to_dataset+instrument+"/"+file 
                print("Analysing file : "+filename)
                # Compute all features, aggregate only 'mean' and 'stdev' statistics for all low-level, rhythm and tonal frame features
                features, features_frames = es.MusicExtractor(lowlevelSilentFrames='drop',
                                                              lowlevelFrameSize=2048,
                                                              lowlevelHopSize=1024,
                                                              lowlevelStats=['mean', 'stdev'])(filename)
                features_frames=[]
                es.YamlOutput(filename = filename.replace('.wav','.json'), format='json')(features)
                features=[]
                filename=[]
    print("Feature Extraction Completed Successfully")

## Feature Selection 

We flatten the json and choose the features that we are interested in. Plot graphs? Distributions?

In [None]:
sortedFeatures=sorted(['lowlevel_average_loudness', 'lowlevel_hfc_mean', 'lowlevel_dissonance_mean', 'lowlevel_pitch_salience_mean'])
features=['filename','instrument']
features.extend(sortedFeatures)

# Load data into Pandas Dataframes
dictValues={}
dfv=pd.DataFrame(dictValues, columns=features)
i=0
for instrument in instruments:
    print("Fetching json files from [INSTRUMENT] : " + instrument)
    files=os.listdir(path_to_dataset+instrument)
    for fileName in files:
        if(fileName.endswith('.json')):
            jsonFile = open (path_to_dataset+instrument+"/"+fileName,"r",encoding="utf-8")
            jsonToPython = json.loads(jsonFile.read(), strict=False)
            flatJson = json_flattener.flatten_json(jsonToPython)
            dictValues[features[0]] = fileName.replace('.json','')
            dictValues[features[1]] = instruments.index(instrument)+1
            for index in range(2,len(features)):
                dictValues[features[index]]=flatJson.get(features[index])
            dfv.loc[i]=(dictValues)
            i+=1
print("Features loaded into panda dataframe!")

## Standardize features to zero mean and unit variance

In [None]:
df1 = dfv.iloc[:, :2] #seperate out filenames and instrument columns from the rest
df2 = dfv.iloc[:, 2:]
scaler = StandardScaler() #To standardize the features to zero mean and unit variance
df2[df2.columns] = scaler.fit_transform(df2[df2.columns])

## Segregating Train and Test data

In [None]:
#Create train_df for training data(80% of dataset) and test_df for test data(20% of dataset).
df = pd.concat([df1, df2], axis=1)
X = df.iloc[:,2:].as_matrix()
y = df.transpose().as_matrix()[1].astype('int')

train_df = pd.concat([df.iloc[:35,:],df.iloc[50:85,:]], ignore_index=True)
test_df = pd.concat([df.iloc[35:50,:],df.iloc[85:100,:]], ignore_index=True)

## Training 

In [None]:
clf = LinearSVC()
clf.fit(X, y)

## Testing

In [None]:
test_data=test_df.iloc[:,2:].as_matrix()
clf_output = clf.predict(test_data) # storing classifier output - predicted labels
gt = test_df.transpose().as_matrix()[1].astype('int') # storing ground truth 

## Evaluation

In [None]:
# Compute confusion matrix
cnf_matrix = confusion_matrix(gt, clf_output)
np.set_printoptions(precision=2)
class_names=instruments

## Results

In [None]:
# A seaborn heatmap is used to visualize the confusion matrix
sns.set()
df_cm = pd.DataFrame(cnf_matrix, index=class_names, columns=class_names)
fig = plt.figure(figsize=(10,7))
heatmap = sns.heatmap(df_cm, annot=True, fmt="d")
heatmap.yaxis.set_ticklabels(heatmap.yaxis.get_ticklabels(), rotation=0, ha='right', fontsize=14)
heatmap.xaxis.set_ticklabels(heatmap.xaxis.get_ticklabels(), rotation=45, ha='right', fontsize=14)
plt.ylabel('True label')
plt.xlabel('Predicted label')

print("Classification accuracy : "+str(100*accuracy_score(gt,clf_output))+" %")

## References

Romani Picas O. Dabiri D., Serra X. "A real-time system for meauring sound goodness in instrumental sounds" 138th Audio Engineering Society Convention, Warsarw, 2015