In [44]:
import pandas as pd 
import numpy as np 
import os
from pydub import AudioSegment 
import matplotlib 
import librosa
import librosa.display
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.mixture import GaussianMixture
from sklearn.metrics import accuracy_score, classification_report
import speaker_verification_toolkit.tools as svt
from tensorflow import keras
from tensorflow.keras import layers 
%matplotlib inline
from tensorflow.keras.models import load_model

In [45]:
def preprocess_and_extract_mfcc(audio_path, noise_threshold=0.02, silence_threshold=0.002, n_mfcc=20):
    # Load audio file
    y, sr = librosa.load(audio_path,sr=16000)

    # Create a time array for plotting
    t = librosa.frames_to_time(np.arange(len(y)),sr=16000)

    # Plot the original audio
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=t, y=y, mode='lines', name='Original Audio'))
    fig.update_layout(title_text='Original Audio', xaxis_title='Time (s)', yaxis_title='Amplitude')
    fig.show()

    # Remove noise
    y = svt.rms_silence_filter(y,threshold=silence_threshold)
    # y = librosa.effects.preemphasis(y)
    # y = librosa.effects.preemphasis(y)
    y = librosa.effects.deemphasis(y)
    y = librosa.effects.deemphasis(y)
    y = librosa.effects.deemphasis(y)
    
    # y = svt.rms_silence_filter(y,threshold=0.02)
    # Trim silent portions
    # yt, index = librosa.effects.trim(y, top_db=silence_threshold)

    # Extract MFCC features
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    # Create a DataFrame with time and MFCC features
    df = pd.DataFrame(mfcc.T, columns=[f'MFCC_{i+1}' for i in range(n_mfcc)])
    df['Time'] = librosa.frames_to_time(np.arange(len(df)), sr=sr)

    # Plot the processed audio
    fig_mfcc = go.Figure()

    for i in range(1, n_mfcc + 1):
        fig_mfcc.add_trace(go.Scatter(x=df['Time'], y=df[f'MFCC_{i}'], mode='markers', name=f'MFCC_{i}'))

    fig_mfcc.update_layout(title_text='MFCC Points over Time', xaxis_title='Time (s)', yaxis_title='MFCC Values')
    fig_mfcc.show()

    # Plot the processed audio
    fig_processed = go.Figure()
    fig_processed.add_trace(go.Scatter(x=t, y=y, mode='lines', name='Processed Audio'))
    fig_processed.update_layout(title_text='Processed Audio', xaxis_title='Time (s)', yaxis_title='Amplitude')
    fig_processed.show()

    return df

# Example usage
audio_file_path = 'sound/UltimateTest/hey_leepi.wav'
df_test = preprocess_and_extract_mfcc(audio_file_path)
df_test["ID"]='2'
df_test["Name"]='leepi'



In [46]:
nn_model = load_model("dualConv_model.h5")


In [49]:

df_test

Unnamed: 0,MFCC_1,MFCC_2,MFCC_3,MFCC_4,MFCC_5,MFCC_6,MFCC_7,MFCC_8,MFCC_9,MFCC_10,...,MFCC_14,MFCC_15,MFCC_16,MFCC_17,MFCC_18,MFCC_19,MFCC_20,Time,ID,Name
0,319.303271,88.779066,79.276236,65.680186,50.542228,36.176647,24.072937,14.722266,7.854388,2.895536,...,-3.348372,-1.932282,-0.100065,1.401414,1.964791,1.366594,-0.157821,0.0,2,0
1,356.218102,134.247292,107.744923,74.500029,44.528169,23.656309,12.225756,7.057818,4.502352,2.362726,...,-3.945176,-2.874279,-0.619139,1.875057,3.290482,2.73366,0.423758,0.032,2,0
2,375.267593,155.993494,117.571367,73.24993,37.896526,16.40939,5.353693,-0.252701,-2.602996,-2.036682,...,0.456794,-0.316682,-0.310467,-0.546865,-1.591249,-2.56321,-2.22206,0.064,2,0
3,382.755951,164.13699,120.351689,71.762645,35.567056,15.63254,5.926954,0.22927,-2.710949,-1.87338,...,0.612676,-0.214609,0.523578,0.483182,-1.710756,-4.30889,-4.48427,0.096,2,0
4,382.934909,164.520595,120.587763,70.879393,33.223592,13.282936,6.002296,3.794648,2.670151,2.208361,...,1.30464,0.95491,0.406608,-1.377522,-4.018727,-5.502733,-4.385232,0.128,2,0
5,376.811119,156.535907,114.100934,65.84255,29.564379,11.751846,7.667053,8.496231,8.258416,5.756734,...,-0.25969,0.878188,1.423573,0.695065,-0.850641,-2.066399,-2.367262,0.16,2,0
6,373.168143,151.292184,109.113999,62.332334,28.637472,13.09692,9.469015,8.885999,6.910274,3.967383,...,-1.974246,-1.440847,0.436359,2.449603,3.341621,2.941618,1.811118,0.192,2,0
7,369.574716,146.535113,105.447272,60.523147,28.848673,14.516727,10.833521,9.500413,7.068012,4.292214,...,-3.399383,-2.24898,0.900328,3.79354,4.630978,3.672927,2.274996,0.224,2,0
8,366.603384,141.854122,100.018017,55.370869,25.429708,13.286192,10.758788,9.028524,5.556644,2.509701,...,-0.795083,-0.682964,0.923287,2.568071,3.136422,3.258831,4.05891,0.256,2,0
9,361.597861,135.487567,95.612045,53.475787,25.380398,13.465832,9.815507,6.927397,3.241351,0.521435,...,-4.209119,-2.476546,0.029996,0.872177,-0.109732,-0.362772,2.066867,0.288,2,0


In [48]:
df_test.Name = (df_test.Name == 'adi').astype(int)

In [50]:
def scale_dataset(dataframe, oversample=False):
    # Separate MFCC features from 'Time' and 'ID'
    mfcc_columns = dataframe.columns[:20]
    time_id_columns = dataframe.columns[20:-1]

    X_mfcc = dataframe[mfcc_columns].values
    X_time_id = dataframe[time_id_columns].values
    y = dataframe[dataframe.columns[-1]].values

    scaler_mfcc = StandardScaler()
    X_mfcc_scaled = scaler_mfcc.fit_transform(X_mfcc)

    # Concatenate the scaled MFCC features with 'Time' and 'ID'
    X_scaled = np.hstack((X_mfcc_scaled, X_time_id))

    if oversample:
        ros = RandomOverSampler()
        X_scaled, y = ros.fit_resample(X_scaled, y)

    # Concatenate the oversampled features with the target column
    data = np.hstack((X_scaled, np.reshape(y, (-1, 1))))

    return data.astype('float64'), X_scaled.astype('float64'), y.astype('float64')

In [51]:
test, X_test, y_test = scale_dataset(df_test, oversample=False)

In [52]:
y_pred =(nn_model.predict(X_test)>0.5).astype(int).reshape(-1,)



In [53]:
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

Accuracy: 0.9387755102040817
Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.94      0.97        49
         1.0       0.00      0.00      0.00         0

    accuracy                           0.94        49
   macro avg       0.50      0.47      0.48        49
weighted avg       1.00      0.94      0.97        49




Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Recall is ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.



In [54]:
pd.DataFrame([y_pred,y_test])

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
