In [2]:
import librosa
import librosa.display
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy as np
import csv
import os
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')



OSError: sndfile library not found

In [None]:
# Dataset Info
dataset_info_csv = './UrbanSound8K.csv'
dataset_info = read_csv(dataset_info_csv, header=0)
print(dataset_info)

In [None]:
# Feature Extraction
# Features
CLASS_LABELS = [4,5]

names = ['filename'
          ,'fold','duration'
          ,'chroma_stft_mean'
          ,'chroma_stft_variance'
          , 'rmse_mean'
          , 'rmse_variance'
          , 'spectral_centroid_mean'
          , 'spectral_centroid_variance'
          , 'spectral_bandwidth_mean'
          , 'spectral_bandwidth_variance'
          , 'spectral_rolloff_mean'
          , 'spectral_rolloff_variance'
          , 'zero_crossing_rate_mean'
          , 'zero_crossing_rate_variance']
for i in range (1,41):
    names.append("MFCC_"+str(i)+"_mean")
    names.append("MFCC_"+str(i)+"_variance")

names.append('class_ID')
print(names)

# Read all WAV files and extract all features write into a new csv file

with open('urbansound8k_features.csv', 'w', newline='') as allFeatures:
    writer = csv.writer(allFeatures)
    writer.writerow(names)

    for file in dataset_info.values:
        if int(file[7]) in CLASS_LABELS:
            audio_file = "urbansound8k/" + "fold" + str(file[6]) + "/" + file[0]
            series , sample_rate = librosa.load(audio_file)
            chroma_stft = librosa.feature.chroma_stft(y=series, sr=sample_rate)
            rmse = librosa.feature.rms(y=series)
            spectral_centroid = librosa.feature.spectral_centroid(y=series, sr=sample_rate)
            spectral_bandwidth = librosa.feature.spectral_bandwidth(y=series, sr=sample_rate)
            spectral_rolloff = librosa.feature.spectral_rolloff(y=series, sr=sample_rate)
            zero_crossing_rate = librosa.feature.zero_crossing_rate(series)
            MFCC = librosa.feature.mfcc(y=series, sr=sample_rate, n_mfcc=40)

            chroma_stft_mean = np.mean(chroma_stft)
            chroma_stft_variance = np.var(chroma_stft)
            rmse_mean = np.mean(rmse)
            rmse_variance = np.var(rmse)
            spectral_centroid_mean = np.mean(spectral_centroid)
            spectral_centroid_variance = np.var(spectral_centroid)
            spectral_bandwidth_mean = np.mean(spectral_bandwidth)
            spectral_bandwidth_variance = np.var(spectral_bandwidth)
            spectral_rolloff_mean = np.mean(spectral_rolloff)
            spectral_rolloff_variance = np.var(spectral_rolloff)
            zero_crossing_rate_mean = np.mean(zero_crossing_rate)
            zero_crossing_rate_variance = np.var(zero_crossing_rate)

            sample = [file[0].replace(".wav","")
                      , file[6]
                      , file[4]
                      , chroma_stft_mean
                      , chroma_stft_variance
                      , rmse_mean
                      , rmse_variance
                      , spectral_centroid_mean
                      , spectral_centroid_variance
                      , spectral_bandwidth_mean
                      , spectral_bandwidth_variance
                      , spectral_rolloff_mean, spectral_rolloff_variance
                      , zero_crossing_rate_mean
                      , zero_crossing_rate_variance]

            for mfcc in MFCC:
                mfcc_mean = np.mean(mfcc)
                mfcc_variance = np.mean(mfcc)
                sample.append(mfcc_mean)
                sample.append(mfcc_variance)

            sample.append(file[7])

            writer.writerow(sample)
    allFeatures.close()

In [None]:
# SVM
# Read Feature PreProcessed before
dataset_file = './urbansound8k_features.csv'
dataset = read_csv(dataset_file)
dataset = np.array(dataset)

# Set up the dataset for crossvalidation
train = []
train_label = []
holdout = []
holdout_label = []

for sample in dataset:
    if sample[1] < 6:
        train.append(sample[2:-1])
        train_label.append(sample[-1])
    elif sample[1] > 7:
        holdout.append(sample[2:-1])
        holdout_label.append(sample[-1])

train = np.array(train)
train_label = np.array(train_label)
holdout = np.array(holdout)
holdout_label = np.array(holdout_label)

scaler = StandardScaler()
transformed_train = scaler.fit_transform(train)
scaler = StandardScaler()
transformed_holdout = scaler.fit_transform(holdout)

tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
                     'C': [1,0.1,0.01,0.001,0.0001]},
                   {'kernel':['linear'], 'C':[1,10,100,1000]}]

scores = ['accuracy','f1_macro' ,'precision_macro', 'recall_macro', 'roc_auc']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        SVC(), tuned_parameters, scoring= score, cv = 5, n_jobs = 2
    )
    clf.fit(transformed_train, train_label)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = holdout_label, clf.predict(transformed_holdout)
    print(classification_report(y_true, y_pred))
    print()

In [None]:
# Random Forest
# Read Feature PreProcessed before
dataset_file = './urbansound8k_features.csv'
dataset = read_csv(dataset_file)
dataset = np.array(dataset)

# Set up the dataset for crossvalidation
train = []
train_label = []
holdout = []
holdout_label = []

for sample in dataset:
    if sample[1] < 6:
        train.append(sample[2:-1])
        train_label.append(sample[-1])
    elif sample[1] > 7:
        holdout.append(sample[2:-1])
        holdout_label.append(sample[-1])

train = np.array(train)
train_label = np.array(train_label)
holdout = np.array(holdout)
holdout_label = np.array(holdout_label)

scaler = StandardScaler()
transformed_train = scaler.fit_transform(train)
scaler = StandardScaler()
transformed_holdout = scaler.fit_transform(holdout)

tuned_parameters = [{"n_estimators":[100,200,300,400,500,600,700,800,900,1000],
                   "max_depth": [10,20,30,40,50]}]

scores = ['accuracy','f1_macro' ,'precision_macro', 'recall_macro', 'roc_auc']


for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        estimator= RandomForestClassifier(random_state = 1), param_grid = tuned_parameters, scoring= score,cv = 5, n_jobs = 2
    )
    clf.fit(transformed_train, train_label)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = holdout_label, clf.predict(transformed_holdout)
    print(classification_report(y_true, y_pred))
    print()