In [None]:
import librosa
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy as np
import csv
import os
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import warnings
from sklearn.metrics import classification_report, roc_auc_score, roc_curve, confusion_matrix, precision_recall_curve, f1_score, auc
warnings.filterwarnings('ignore')

In [None]:
# Dataset Info
dataset_info_csv = './UrbanSound8K.csv'
dataset_info = read_csv(dataset_info_csv, header=0)
print(dataset_info)

In [None]:
# Feature Extraction
# Features
CLASS_LABELS = [4,5]

names = ['filename'
          ,'fold','duration'
          ,'chroma_stft_mean'
          ,'chroma_stft_variance'
          , 'rmse_mean'
          , 'rmse_variance'
          , 'spectral_centroid_mean'
          , 'spectral_centroid_variance'
          , 'spectral_bandwidth_mean'
          , 'spectral_bandwidth_variance'
          , 'spectral_rolloff_mean'
          , 'spectral_rolloff_variance'
          , 'zero_crossing_rate_mean'
          , 'zero_crossing_rate_variance']
for i in range (1,41):
    names.append("MFCC_"+str(i)+"_mean")
    names.append("MFCC_"+str(i)+"_variance")

names.append('class_ID')
print(names)

# Read all WAV files and extract all features write into a new csv file

with open('urbansound8k_features.csv', 'w', newline='') as allFeatures:
    writer = csv.writer(allFeatures)
    writer.writerow(names)

    for file in dataset_info.values:
        if int(file[7]) in CLASS_LABELS:
            audio_file = "urbansound8k/" + "fold" + str(file[6]) + "/" + file[0]
            series , sample_rate = librosa.load(audio_file)
            chroma_stft = librosa.feature.chroma_stft(y=series, sr=sample_rate)
            rmse = librosa.feature.rms(y=series)
            spectral_centroid = librosa.feature.spectral_centroid(y=series, sr=sample_rate)
            spectral_bandwidth = librosa.feature.spectral_bandwidth(y=series, sr=sample_rate)
            spectral_rolloff = librosa.feature.spectral_rolloff(y=series, sr=sample_rate)
            zero_crossing_rate = librosa.feature.zero_crossing_rate(series)
            MFCC = librosa.feature.mfcc(y=series, sr=sample_rate, n_mfcc=40)

            chroma_stft_mean = np.mean(chroma_stft)
            chroma_stft_variance = np.var(chroma_stft)
            rmse_mean = np.mean(rmse)
            rmse_variance = np.var(rmse)
            spectral_centroid_mean = np.mean(spectral_centroid)
            spectral_centroid_variance = np.var(spectral_centroid)
            spectral_bandwidth_mean = np.mean(spectral_bandwidth)
            spectral_bandwidth_variance = np.var(spectral_bandwidth)
            spectral_rolloff_mean = np.mean(spectral_rolloff)
            spectral_rolloff_variance = np.var(spectral_rolloff)
            zero_crossing_rate_mean = np.mean(zero_crossing_rate)
            zero_crossing_rate_variance = np.var(zero_crossing_rate)

            sample = [file[0].replace(".wav","")
                      , file[6]
                      , file[4]
                      , chroma_stft_mean
                      , chroma_stft_variance
                      , rmse_mean
                      , rmse_variance
                      , spectral_centroid_mean
                      , spectral_centroid_variance
                      , spectral_bandwidth_mean
                      , spectral_bandwidth_variance
                      , spectral_rolloff_mean, spectral_rolloff_variance
                      , zero_crossing_rate_mean
                      , zero_crossing_rate_variance]

            for mfcc in MFCC:
                mfcc_mean = np.mean(mfcc)
                mfcc_variance = np.mean(mfcc)
                sample.append(mfcc_mean)
                sample.append(mfcc_variance)

            sample.append(file[7])

            writer.writerow(sample)
    allFeatures.close()

In [None]:
# SVM
# Read Feature PreProcessed before
dataset_file = './urbansound8k_features.csv'
dataset = read_csv(dataset_file)
dataset = np.array(dataset)

# Set up the dataset for crossvalidation
train = []
train_label = []
holdout = []
holdout_label = []

for sample in dataset:
    if sample[1] < 6:
        train.append(sample[2:-1])
        train_label.append(sample[-1])
    elif sample[1] > 7:
        holdout.append(sample[2:-1])
        holdout_label.append(sample[-1])

train = np.array(train)
train_label = np.array(train_label)
holdout = np.array(holdout)
holdout_label = np.array(holdout_label)

scaler = StandardScaler()
transformed_train = scaler.fit_transform(train)
scaler = StandardScaler()
transformed_holdout = scaler.fit_transform(holdout)

tuned_parameters = [{'kernel': ['rbf'], 'gamma': [0.1,0.01,0.001,0.0001],
                     'C': [1,0.1,0.01,0.001,0.0001]},
                   {'kernel':['linear'], 'C':[1,10,100,1000]}]

scores = ['accuracy']

for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        SVC(random_state = 1), tuned_parameters, scoring= score, cv = 5, n_jobs = 2
    )
    clf.fit(transformed_train, train_label)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = holdout_label, clf.predict(transformed_holdout)
    classification_metrics = classification_report(y_true, y_pred, target_names = ['drilling','engine_idling'],output_dict= True)
    accuracy = classification_metrics['accuracy']
    sensitivity = classification_metrics['engine_idling']['recall']
    specificity = classification_metrics['drilling']['recall']
    
    
    probability = clf.decision_function(transformed_holdout)
    target_list = np.array(holdout_label, dtype='int')
    score_list = np.array(probability, dtype='float')
    
    for i in range(len(target_list)):
        if target_list[i] == 4:
            target_list[i] = 0
        else:
            target_list[i] = 1
    for i in range(len(y_pred)):
        if y_pred[i] == 4:
            y_pred[i] = 0
        else:
            y_pred[i] = 1        
            
    f1 = f1_score(target_list, y_pred)
    
    roc_score = roc_auc_score(target_list, score_list)
    conf_matrix = confusion_matrix(target_list, y_pred)
    precision, recall, thresholds = precision_recall_curve(target_list, score_list)
    pr_score = auc(recall, precision)
    
    # plot the pr curve
    pr_score = auc(recall, precision)
    plt.figure(figsize=(4,4))
    plt.plot(recall,precision, label = "AUPRC = {:.4f}".format(pr_score))
    plt.plot([(0,0),(1,1)],"k--")
    plt.legend(loc = 'best')
    plt.title("Precision-Recall Curve", fontsize=14)
    plt.xlabel('Recall', fontsize=12)
    plt.ylabel('Precision', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("./PR/SVM.png")
    plt.clf()

    # plot the roc curve
    fpr, tpr, _ = roc_curve(target_list, probability)
    plt.figure(figsize=(4,4))
    plt.plot(fpr, tpr, label = "AUROC = {:.4f}".format(roc_score))
    plt.title("ROC Curve", fontsize=14)
    plt.plot([(0,0),(1,1)],"r-")
    plt.legend(loc = 'best')
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("./ROC/SVM.png")
    plt.clf()
    
    print(conf_matrix)
    print("Sensitivity:", sensitivity)
    print("Specificity:", specificity)
    print("AUROC:", roc_score)
    print("AUPRC:", pr_score)
    print("F1:", f1)
    print("Holdout:", accuracy)

In [None]:
# Random Forest
# Read Feature PreProcessed before
dataset_file = './urbansound8k_features.csv'
dataset = read_csv(dataset_file)
dataset = np.array(dataset)

# Set up the dataset for crossvalidation
train = []
train_label = []
holdout = []
holdout_label = []

for sample in dataset:
    if sample[1] < 6:
        train.append(sample[2:-1])
        train_label.append(sample[-1])
    elif sample[1] > 7:
        holdout.append(sample[2:-1])
        holdout_label.append(sample[-1])

train = np.array(train)
train_label = np.array(train_label)
holdout = np.array(holdout)
holdout_label = np.array(holdout_label)

scaler = StandardScaler()
transformed_train = scaler.fit_transform(train)
scaler = StandardScaler()
transformed_holdout = scaler.fit_transform(holdout)

tuned_parameters = [{"n_estimators":[100,200,300,400,500,600,700,800,900,1000],
                   "max_depth": [10,20,30,40,50]}]

scores = ['accuracy']


for score in scores:
    print("# Tuning hyper-parameters for %s" % score)
    print()

    clf = GridSearchCV(
        estimator= RandomForestClassifier(random_state = 1), param_grid = tuned_parameters, scoring= score,cv = 5, n_jobs = 2
    )
    clf.fit(transformed_train, train_label)

    print("Best parameters set found on development set:")
    print()
    print(clf.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clf.cv_results_['mean_test_score']
    stds = clf.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))
    print()

    print("Detailed classification report:")
    print()
    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = holdout_label, clf.predict(transformed_holdout)
    classification_metrics = classification_report(y_true, y_pred, target_names = ['drilling','engine_idling'],output_dict= True)
    accuracy = classification_metrics['accuracy']
    sensitivity = classification_metrics['engine_idling']['recall']
    specificity = classification_metrics['drilling']['recall']
    
    
    probability = clf.predict_proba(transformed_holdout)[:,1]
    target_list = np.array(holdout_label, dtype='int')
    score_list = np.array(probability, dtype='float')
    
    for i in range(len(target_list)):
        if target_list[i] == 4:
            target_list[i] = 0
        else:
            target_list[i] = 1
    for i in range(len(y_pred)):
        if y_pred[i] == 4:
            y_pred[i] = 0
        else:
            y_pred[i] = 1        
            
    f1 = f1_score(target_list, y_pred)
    
    roc_score = roc_auc_score(target_list, score_list)
    conf_matrix = confusion_matrix(target_list, y_pred)
    precision, recall, thresholds = precision_recall_curve(target_list, score_list)
    pr_score = auc(recall, precision)
    
    # plot the pr curve
    pr_score = auc(recall, precision)
    plt.figure(figsize=(4,4))
    plt.plot(recall,precision, label = "AUPRC = {:.4f}".format(pr_score))
    plt.plot([(0,0),(1,1)],"k--")
    plt.legend(loc = 'best')
    plt.title("Precision-Recall Curve", fontsize=14)
    plt.xlabel('Recall', fontsize=12)
    plt.ylabel('Precision', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("./PR/RF.png")
    plt.clf()

    # plot the roc curve
    fpr, tpr, _ = roc_curve(target_list, probability)
    plt.figure(figsize=(4,4))
    plt.plot(fpr, tpr, label = "AUROC = {:.4f}".format(roc_score))
    plt.title("ROC Curve", fontsize=14)
    plt.plot([(0,0),(1,1)],"r-")
    plt.legend(loc = 'best')
    plt.xlabel('False Positive Rate', fontsize=12)
    plt.ylabel('True Positive Rate', fontsize=12)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.grid(True)
    plt.tight_layout()
    plt.savefig("./ROC/RF.png")
    plt.clf()
    
    print(conf_matrix)
    print("Sensitivity:", sensitivity)
    print("Specificity:", specificity)
    print("AUROC:", roc_score)
    print("AUPRC:", pr_score)
    print("F1:", f1)
    print("Holdout:", accuracy)