# Detecting the Level of Background Noise in audio

In [3]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay, classification_report
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier

## Data formatting

In [4]:
data = pd.read_csv('features.csv')
data['label'] = data['label'].replace('low', 0)
data['label'] = data['label'].replace('medium', 1)
data['label'] = data['label'].replace('high', 2)
data = data.drop('path', axis=1)
data

  data['label'] = data['label'].replace('high', 2)


Unnamed: 0,label,mfccs,chroma_stft,spectral_centroid,spectral_bandwidth,spectral_rolloff,zero_crossing_rate,rms,poly_features,tonnetz,mel_spectrogram,spectral_contrast
0,0,-12.118624,0.455088,1732.911934,1547.758216,3330.189196,0.100148,0.021002,0.261644,0.000280,0.151428,22.500513
1,0,-12.408580,0.391813,1926.673503,1486.158380,3398.032116,0.140800,0.035025,0.398767,0.005051,0.385223,24.144890
2,0,-14.399076,0.415932,1493.304564,1371.833468,2775.292040,0.089098,0.025171,0.290828,0.003366,0.265907,22.921880
3,0,-15.140317,0.312100,1788.898397,1604.712814,3271.031841,0.104681,0.046138,0.402851,0.004308,0.726101,25.222640
4,0,-17.082851,0.397885,2091.568985,1758.813735,4009.887232,0.143542,0.032982,0.319061,-0.002882,0.425109,23.197930
...,...,...,...,...,...,...,...,...,...,...,...,...
3295,2,-4.116476,0.380773,1395.591307,1542.831774,2866.565935,0.068662,0.076352,1.095193,0.002820,1.470585,22.882074
3296,2,-4.808410,0.462839,1620.362385,1591.303296,3119.865398,0.090697,0.042342,0.542329,-0.002770,0.594997,22.639281
3297,2,-6.858592,0.397525,1926.937648,1748.966893,3647.370324,0.114342,0.064320,0.776456,-0.009003,1.200151,24.252625
3298,2,-10.711108,0.357874,1878.183590,1528.339699,3398.688032,0.119867,0.050409,0.678896,-0.007200,0.724871,23.372391


In [5]:
X = data.drop('label', axis=1)
y = data['label']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(2310, 11)
(2310,)
(990, 11)
(990,)


In [7]:
y_test.value_counts()

label
1    345
0    330
2    315
Name: count, dtype: int64

## Predicting

In [8]:
model = RandomForestClassifier()
# model.fit(X_train, y_train)

In [9]:
# preds = model.predict(X_test)

In [10]:
# print(classification_report(y_test, preds))
# ConfusionMatrixDisplay.from_predictions(y_test, preds)

## Predictions for all models

In [11]:
models = [
    {
        'name': 'Random Forest',
        'model': RandomForestClassifier(),
    },
    {
        'name': 'SVM',
        'model': LinearSVC()
    },
    {
        'name': 'Logistic Regression',
        'model': LogisticRegression()
    },
    {
        'name': 'KNN',
        'model': KNeighborsClassifier()
    },
    {
        'name': 'MLP',
        'model': MLPClassifier()
    }
]

In [12]:
for m in models:
    m['model'].fit(X_train, y_train)
    y_pred = m['model'].predict(X_test)
    m['y_pred'] = y_pred
    print(f"############################ {m['name']} ############################")
    print(classification_report(y_test, y_pred))
    # ConfusionMatrixDisplay.from_predictions(y_test, y_pred)

############################ Random Forest ############################
              precision    recall  f1-score   support

           0       0.60      0.73      0.66       330
           1       0.49      0.43      0.46       345
           2       0.73      0.67      0.70       315

    accuracy                           0.60       990
   macro avg       0.61      0.61      0.60       990
weighted avg       0.60      0.60      0.60       990



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


############################ SVM ############################
              precision    recall  f1-score   support

           0       0.46      0.98      0.62       330
           1       0.30      0.24      0.27       345
           2       1.00      0.01      0.03       315

    accuracy                           0.41       990
   macro avg       0.59      0.41      0.31       990
weighted avg       0.57      0.41      0.31       990

############################ Logistic Regression ############################
              precision    recall  f1-score   support

           0       0.58      0.78      0.66       330
           1       0.48      0.30      0.37       345
           2       0.64      0.67      0.66       315

    accuracy                           0.58       990
   macro avg       0.57      0.58      0.56       990
weighted avg       0.56      0.58      0.56       990

############################ KNN ############################
              precision    recall  f

## Pickling Random Forest

In [None]:
m = models[0]
with open("bgnoise_model.pkl", 'wb') as file:
    pickle.dump(m['model'], file)