# **Random Forest**
- Using data that was not annotated
- Training it on the unbalanced data
- Using avgpool
- 6 second window

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Standard libraries
import numpy as np
import pandas as pd
import time
import os

# For audio
from IPython.display import Audio
import librosa

# For preprocessing
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
import tensorflow as tf

# For modeling
from sklearn.svm import SVC
from sklearn.metrics import classification_report, roc_auc_score, f1_score

# Operational
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import time
from scipy.signal import butter, filtfilt
import random

In [3]:
pkl_path = '/content/drive/My Drive/Final-Year-Project/Dataset/Final-Version-of-Bird-Classification-Project/feature-extraction/Annotated/Regular/NotAveragePooled/split_features_3s_all_2D.pkl'

# Load the pickle file
with open(pkl_path, 'rb') as file:
    data = pickle.load(file)

In [4]:
train_data = data['train'].copy()
val_data = data['val'].copy()

In [5]:
train_data

{'melspectrogram': array([[[-0.01838665,  0.        , -0.00848863, ..., -0.0140964 ,
          -0.00224623, -0.00832028],
         [-0.1968215 , -0.21882324, -0.27436921, ..., -0.28576213,
          -0.22507286, -0.19316849],
         [-0.23742069, -0.24905342, -0.28791261, ..., -0.28030974,
          -0.26599282, -0.25660488],
         ...,
         [-0.99879366, -0.99876946, -0.99870896, ..., -0.99917048,
          -0.99937928, -0.99937975],
         [-0.9994275 , -0.99943322, -0.99946558, ..., -0.99974817,
          -0.99971521, -0.99969888],
         [-1.        , -1.        , -1.        , ..., -1.        ,
          -1.        , -1.        ]],
 
        [[-0.01671414,  0.        , -0.0082565 , ..., -0.01512463,
          -0.00132613, -0.0053683 ],
         [-0.17772998, -0.19626212, -0.24940659, ..., -0.30918679,
          -0.24149542, -0.20524147],
         [-0.23100212, -0.23447959, -0.25692546, ..., -0.26929191,
          -0.2456764 , -0.23258191],
         ...,
         [-0.99

In [6]:
train_labels = train_data['label'].copy()
temp = train_data.copy()
del temp['label']
tr_features = temp

In [7]:
val_labels = val_data['label'].copy()
temp = val_data.copy()
del temp['label']
v_features = temp

# Shuffle Data

In [8]:
def shuffle_data(input_label, input_features):
  input_len = len(input_label)
  np.random.seed(1826)
  input_indices = np.random.permutation(input_len)
  input_features = {key: np.array([input_features[key][i] for i in input_indices]) for key in input_features} # dictionary comprehension
  input_label = np.array([input_label[i] for i in input_indices])

  return input_label, input_features

In [9]:
train_y, train_features = shuffle_data(train_labels, tr_features)

In [10]:
val_y, val_features = shuffle_data(val_labels, v_features)

In [11]:
display(train_y.shape)
display(train_y[:15])

(5278,)

array([ 1, 16,  1,  2, 13,  3, 16, 15,  1,  0,  9, 13,  2, 17,  5],
      dtype=int32)

In [12]:
for key in train_features.keys():
  display(key)
  display(train_features[key].shape)
  display(train_features[key][0])

'melspectrogram'

(5278, 128, 259)

array([[-0.01707135,  0.        , -0.00803004, ..., -0.01309088,
        -0.00134445, -0.00628396],
       [-0.19095391, -0.20925798, -0.26611498, ..., -0.30659822,
        -0.22971404, -0.1961073 ],
       [-0.26433569, -0.28252903, -0.33811274, ..., -0.37626317,
        -0.30302411, -0.27038145],
       ...,
       [-0.98720688, -0.98624605, -0.98306841, ..., -0.98450774,
        -0.99309105, -0.99470353],
       [-0.99932593, -0.99883336, -0.99588645, ..., -0.99674553,
        -1.        , -1.        ],
       [-1.        , -1.        , -1.        , ..., -1.        ,
        -0.99780911, -0.99684298]])

'mfcc'

(5278, 20, 259)

array([[-0.94879884, -1.        , -1.        , ..., -1.        ,
        -1.        , -0.9502812 ],
       [ 1.        ,  0.91938615,  0.57537198, ...,  0.42115942,
         0.80573803,  1.        ],
       [ 0.05686987,  0.00296204, -0.07928391, ..., -0.16479762,
        -0.04739549,  0.05014246],
       ...,
       [-0.03221215, -0.04125709, -0.03007667, ...,  0.07457075,
         0.07594217,  0.06102226],
       [ 0.09270835,  0.08251934,  0.03903725, ...,  0.12923747,
         0.15029721,  0.13926271],
       [ 0.07883924,  0.10498339,  0.10460295, ..., -0.06592759,
        -0.10321572, -0.09633345]])

'chroma'

(5278, 12, 259)

array([[0.73589081, 0.73715395, 0.87803555, ..., 0.28893852, 0.36369899,
        0.49300808],
       [0.87794137, 0.94274384, 1.        , ..., 0.26213828, 0.35391361,
        0.50193125],
       [1.        , 1.        , 0.87642187, ..., 0.35677135, 0.39522588,
        0.53605503],
       ...,
       [0.74050874, 0.71656156, 0.647811  , ..., 0.79869151, 0.61559939,
        0.62869602],
       [0.79570669, 0.84182817, 0.86247641, ..., 1.        , 1.        ,
        1.        ],
       [0.75836134, 0.74041015, 0.6537447 , ..., 0.5199548 , 0.6266908 ,
        0.73339826]])

'cqt'

(5278, 84, 259)

array([[-2.26095021e-01, -2.54480004e-01, -4.00108874e-01, ...,
        -2.03836262e-01, -3.83356214e-02, -1.49011612e-08],
       [-4.53062952e-01, -5.30015230e-01, -6.76905394e-01, ...,
        -4.46281791e-01, -2.90680707e-01, -2.05382109e-01],
       [-7.65330911e-01, -7.82241166e-01, -9.86226559e-01, ...,
        -7.43246794e-01, -5.16093910e-01, -4.64929044e-01],
       ...,
       [-1.30036249e+01, -1.90142689e+01, -1.98970470e+01, ...,
        -1.04235544e+01, -1.75874863e+01, -9.66891193e+00],
       [-1.28789825e+01, -1.69870529e+01, -1.28914795e+01, ...,
        -1.12526684e+01, -6.83810806e+00, -1.07536049e+01],
       [-1.55329523e+01, -1.89735603e+01, -1.53286657e+01, ...,
        -1.92332726e+01, -9.36328983e+00, -2.50303764e+01]])

'id'

(5278,)

'XC358628.mp3'

In [13]:
display(val_y.shape)
display(val_y[:15])

(1350,)

array([17,  2,  1,  2, 18,  2,  1,  3,  7,  2,  2, 15,  9, 15,  6],
      dtype=int32)

In [14]:
for key in val_features.keys():
  display(key)
  display(val_features[key].shape)
  display(val_features[key][0])

'melspectrogram'

(1350, 128, 259)

array([[-0.08927982, -0.07154191, -0.07405514, ..., -0.07131123,
        -0.04856302, -0.04848331],
       [-0.25275759, -0.26858278, -0.31829946, ..., -0.30957727,
        -0.2453672 , -0.21418647],
       [-0.32182292, -0.33767052, -0.38655693, ..., -0.3730203 ,
        -0.31960808, -0.29334177],
       ...,
       [-0.99583571, -0.99586676, -0.99619127, ..., -0.99568186,
        -0.99508411, -0.99514051],
       [-0.99720996, -0.99733484, -0.99788771, ..., -0.99575268,
        -0.99455928, -0.99454338],
       [-0.99826414, -0.99861173, -1.        , ..., -1.        ,
        -1.        , -1.        ]])

'mfcc'

(1350, 20, 259)

array([[-1.        , -1.        , -1.        , ..., -1.        ,
        -1.        , -0.82066895],
       [ 0.96097   ,  0.85016742,  0.6096302 , ...,  0.62035277,
         0.9400284 ,  1.        ],
       [ 0.19480562,  0.17227151,  0.12094433, ...,  0.12264416,
         0.19606752,  0.20876604],
       ...,
       [ 0.03821007,  0.03807084,  0.03454567, ...,  0.02843895,
         0.04579981,  0.04630303],
       [ 0.03098821,  0.0313242 ,  0.03192942, ...,  0.02091692,
         0.03440663,  0.03533517],
       [ 0.02809926,  0.02638748,  0.0183099 , ...,  0.01975891,
         0.02961496,  0.03002796]])

'chroma'

(1350, 12, 259)

array([[0.86446084, 0.89393963, 0.86013256, ..., 0.94347106, 1.        ,
        0.97888406],
       [0.89163321, 0.92367156, 0.90457933, ..., 0.91421372, 0.95979281,
        0.96453417],
       [0.93943825, 0.96408452, 0.9529742 , ..., 0.91689091, 0.94659123,
        0.9726789 ],
       ...,
       [0.8605852 , 0.89337216, 0.86910235, ..., 0.78664242, 0.86763858,
        0.92010274],
       [0.81955395, 0.85280198, 0.81670655, ..., 0.79686012, 0.89063948,
        0.91709258],
       [0.80799763, 0.83584706, 0.7913299 , ..., 0.80723091, 0.89263155,
        0.90341277]])

'cqt'

(1350, 84, 259)

array([[ -4.96117946,  -4.98943387,  -5.13477446, ...,  -4.10589573,
         -3.9544641 ,  -3.89248371],
       [ -5.1931474 ,  -5.2702744 ,  -5.41740439, ...,  -6.53222728,
         -5.9276817 ,  -5.43866031],
       [ -5.50660534,  -5.523381  ,  -5.72691386, ...,  -2.27492126,
         -2.23120413,  -2.28983142],
       ...,
       [-24.73274405, -60.34352413, -59.73790802, ..., -51.30912419,
        -56.29529652, -50.66915157],
       [-25.05466204, -59.58331214, -57.62388118, ..., -50.94163721,
        -52.00354656, -44.67068148],
       [-25.22672321, -62.69822785, -63.1191885 , ..., -49.59993179,
        -51.97245995, -53.07590773]])

'id'

(1350,)

'XC252756.mp3'

## **Random Forest Model**

Saving evaluation results

In [15]:
def evaluate_model(model, validation_features, val_y):
    # Predict class labels for validation set
    val_yhat_result = model.predict(validation_features)

    # Print classification report
    print('Validation classification Report \n')
    print(classification_report(val_y, val_yhat_result))

    # Get probabilities for the validation set (for AUC calculation)
    # val_y_proba = model.predict_proba(validation_features)

    # Calculate AUC for multiclass classification using 'ovr' and 'weighted' average
    auc_score = None # roc_auc_score(val_y, val_y_proba, multi_class='ovr', average='weighted') Look at documentation
    # print(f'AUC Score: {auc_score}')

    # Calculate F1-score with 'weighted' average for imbalanced dataset
    f1 = f1_score(val_y, val_yhat_result, average='weighted')
    print(f'F1 Score (Weighted): {f1}')

    # Store the scores in the dictionary
    val_score = {'f1': f1, 'auc': auc_score}

    # Return the scores dictionary
    return val_score

In [16]:
train_results = {}
val_results = {}

val_scores = {}

### **With all the features**

In [17]:
training_features_3D = np.concatenate((train_features['mfcc'], train_features['chroma'], train_features['cqt'], train_features['melspectrogram']), axis=1)
training_features = training_features_3D.reshape(training_features_3D.shape[0], -1)
training_features.shape

(5278, 63196)

In [18]:
validation_features_3D = np.concatenate((val_features['mfcc'], val_features['chroma'], val_features['cqt'], val_features['melspectrogram']), axis=1)
validation_features = validation_features_3D.reshape(validation_features_3D.shape[0], -1)
validation_features.shape

(1350, 63196)

Fit the model with training data

In [19]:
model = SVC(kernel='rbf', C=10)
model.fit(training_features, train_y)

In [20]:
a = model.score(training_features, train_y)
b = model.score(validation_features, val_y)

print('Training accuracy:', a)
print('Validation accuracy:', b)

train_results['all_features'] = a
val_results['all_features'] = b

Training accuracy: 0.9475179992421372
Validation accuracy: 0.6118518518518519


In [17]:
train_results['all_features'] = 0.9475179992421372
val_results['all_features'] = 0.6118518518518519

In [21]:
val_scores['all_features'] = evaluate_model(model=model, validation_features=validation_features, val_y=val_y)

Validation classification Report 

              precision    recall  f1-score   support

           0       0.65      0.74      0.69       114
           1       0.54      0.74      0.63       141
           2       0.84      0.71      0.77       271
           3       0.45      0.48      0.46        90
           4       1.00      0.39      0.56        18
           5       0.20      0.07      0.10        15
           6       0.37      0.30      0.33        23
           7       0.96      0.98      0.97        44
           8       0.28      0.50      0.36        50
           9       0.95      0.84      0.89        49
          10       0.68      0.67      0.67        48
          11       0.23      0.22      0.22        46
          12       0.75      0.72      0.74        54
          13       0.43      0.69      0.53        48
          14       0.23      0.24      0.23        38
          15       0.39      0.25      0.30       117
          16       0.94      0.89      0.92   

In [18]:
val_scores['all_features'] = {'f1': 0.6117467120817782, 'auc': None}

## **With Mel-Spectrogram and Chroma**

In [19]:
training_features_3D = np.concatenate((train_features['melspectrogram'], train_features['chroma']), axis=1)
training_features = training_features_3D.reshape(training_features_3D.shape[0], -1)
training_features.shape

(5278, 36260)

In [20]:
validation_features_3D = np.concatenate((val_features['melspectrogram'], val_features['chroma']), axis=1)
validation_features = validation_features_3D.reshape(validation_features_3D.shape[0], -1)
validation_features.shape

(1350, 36260)

In [21]:
model = SVC(kernel='rbf', C=10)
model.fit(training_features, train_y)

In [22]:
a = model.score(training_features, train_y)
b = model.score(validation_features, val_y)

print('Training accuracy:', a)
print('Validation accuracy:', b)

train_results['melspectrogram_chroma'] = a
val_results['melspectrogram_chroma'] = b

Training accuracy: 0.9651383099658962
Validation accuracy: 0.7222222222222222


In [23]:
val_scores['melspectrogram_chroma'] = evaluate_model(model=model, validation_features=validation_features, val_y=val_y)

Validation classification Report 

              precision    recall  f1-score   support

           0       0.82      0.78      0.80       114
           1       0.64      0.60      0.62       141
           2       0.79      0.79      0.79       271
           3       0.80      1.00      0.89        90
           4       0.67      0.11      0.19        18
           5       0.44      0.27      0.33        15
           6       0.22      0.35      0.27        23
           7       0.98      0.98      0.98        44
           8       0.68      0.52      0.59        50
           9       0.79      0.94      0.86        49
          10       0.91      0.83      0.87        48
          11       0.64      0.50      0.56        46
          12       0.69      0.76      0.73        54
          13       0.46      0.79      0.58        48
          14       0.56      0.84      0.67        38
          15       0.48      0.42      0.45       117
          16       1.00      0.95      0.97   

## **With Mel-Spectrogram, MFCCs, Chroma**

In [24]:
training_features_3D = np.concatenate((train_features['melspectrogram'], train_features['chroma'], train_features['mfcc']), axis=1)
training_features = training_features_3D.reshape(training_features_3D.shape[0], -1)
training_features.shape

(5278, 41440)

In [25]:
validation_features_3D = np.concatenate((val_features['melspectrogram'], val_features['chroma'], val_features['mfcc']), axis=1)
validation_features = validation_features_3D.reshape(validation_features_3D.shape[0], -1)
validation_features.shape

(1350, 41440)

In [26]:
model = SVC(kernel='rbf', C=10)
model.fit(training_features, train_y)

In [27]:
a = model.score(training_features, train_y)
b = model.score(validation_features, val_y)

print('Training accuracy:', a)
print('Validation accuracy:', b)

train_results['melspectrogram_chroma_mfcc'] = a
val_results['melspectrogram_chroma_mfcc'] = b

Training accuracy: 0.9577491474043198
Validation accuracy: 0.7237037037037037


In [28]:
val_scores['melspectrogram_chroma_mfcc'] = evaluate_model(model=model, validation_features=validation_features, val_y=val_y)

Validation classification Report 

              precision    recall  f1-score   support

           0       0.83      0.78      0.81       114
           1       0.63      0.60      0.61       141
           2       0.79      0.79      0.79       271
           3       0.79      1.00      0.88        90
           4       0.60      0.17      0.26        18
           5       0.44      0.27      0.33        15
           6       0.22      0.30      0.25        23
           7       0.98      0.98      0.98        44
           8       0.68      0.54      0.60        50
           9       0.82      0.94      0.88        49
          10       0.89      0.83      0.86        48
          11       0.71      0.48      0.57        46
          12       0.70      0.78      0.74        54
          13       0.46      0.81      0.59        48
          14       0.57      0.87      0.69        38
          15       0.49      0.42      0.45       117
          16       1.00      0.95      0.97   

# Review results from all models

In [29]:
train_results_df = pd.DataFrame(list(train_results.items()), columns=['Features', 'Train_Accuracy']).round(3)
val_results_df = pd.DataFrame(list(val_results.items()), columns=['Features', 'Val_Accuracy']).round(3)

result_df = train_results_df.merge(val_results_df, on='Features')
result_df = result_df.sort_values('Features')
result_df

Unnamed: 0,Features,Train_Accuracy,Val_Accuracy
0,all_features,0.948,0.612
1,melspectrogram_chroma,0.965,0.722
2,melspectrogram_chroma_mfcc,0.958,0.724


In [30]:
val_scores_df = pd.DataFrame([(key, value['f1'], value['auc']) for key, value in val_scores.items()],
                             columns=['Features', 'F1_Score', 'AUC_Score']).round(3)

val_scores_df = val_scores_df.sort_values('Features')
print(val_scores_df)

                     Features  F1_Score AUC_Score
0                all_features     0.612      None
1       melspectrogram_chroma     0.720      None
2  melspectrogram_chroma_mfcc     0.721      None
