In [None]:
#-------------------------------------------------------------------------------------JUPYTER NOTEBOOK SETTINGS-------------------------------------------------------------------------------------
from IPython.core.display import display, HTML                                    
display(HTML("<style>.container { width:100% !important; }</style>"))  

In [None]:
import os
import gc
import re
import librosa
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from joblib import dump, load

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.manifold import TSNE
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Layer, Input, Conv1D, MaxPooling1D, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import Callback, ReduceLROnPlateau, ModelCheckpoint, EarlyStopping 

import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

In [None]:
# Only for Adversarial Trained Models
@tf.keras.utils.register_keras_serializable()
class GradientReversalLayer(Layer):
    def __init__(self, lambda_=1.0, **kwargs):
        super(GradientReversalLayer, self).__init__(**kwargs)
        self.lambda_ = lambda_

    @tf.custom_gradient
    def call(self, x):
        def grad(dy):
            return -self.lambda_ * dy
        return x, grad

    def get_config(self):
        config = super().get_config()
        config.update({"lambda_": self.lambda_})
        return config

### Standalone File Probabilities Output

In [None]:
# Custom Samples
def process_wav_file(wav_file_path, max_length=332):
    # Load the WAV file
    signal, sr = librosa.load(wav_file_path, sr=16000)  # Ensure sample rate is 16000 Hz
    # Compute MFCC features
    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13, n_fft=256, hop_length=160, n_mels=32, fmin=0, fmax=8000)
    # Calculate padding width
    pad_width = max_length - mfccs.shape[1]
    if pad_width > 0:  # Apply padding if needed
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    return mfccs

def predict_wav_file(wav_file_path, model, label_encoder):
    # Process the WAV file to get padded MFCCs
    mfccs_padded = process_wav_file(wav_file_path)

    # Reshape the input to fit the model by adding a batch dimension
    mfccs_padded = mfccs_padded[np.newaxis, ...]  # Add batch dimension, reshaping (13, 332) to (1, 13, 332)

    # Perform prediction using the model to get softmax outputs
    softmax_output = model.predict(mfccs_padded)[0]  # [0] to get the first (and only) batch item
    
    # Create a DataFrame to hold the probabilities associated with each label
    labels = label_encoder.classes_  # Assuming label_encoder has all labels
    probabilities_df = pd.DataFrame(softmax_output, index=labels, columns=['Probability'])

    return probabilities_df

model = model = load_model('saved_data/models/non-masked_custom_cnn/custom-cnn_final_model.keras')  # Load pre-trained model

all_labels = ['battery', 'description', 'environment', 'greeting', 'health', 'noise', 'noise', 'nutrition', 'silence', 'sun', 'water']  
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

# Path to the WAV file 
wav_file_path = '/Users/ciprian/Desktop/Projects/Smart Plant Pot/Audio/Voice Recognition/_testing_samples_bianca/are you hot?.mp3'  

# Get the prediction DataFrame
probabilities_df = predict_wav_file(wav_file_path, model, label_encoder)

# Find the highest probability
max_label = probabilities_df['Probability'].idxmax()
highest_probability = probabilities_df['Probability'].max()
print(f"The highest probability is {highest_probability}, associated with label '{max_label}'")

probabilities_df

### Multi Sub-directory Processing with Accuracy

In [None]:
def process_audio_file(file_path, max_length=332):
    # Load the audio file with librosa, handle both mp3 and wav formats
    signal, sr = librosa.load(file_path, sr=16000)
    mfccs = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13, n_fft=256, hop_length=160, n_mels=32, fmin=0, fmax=8000)
    pad_width = max_length - mfccs.shape[1]
    if pad_width > 0:
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    return mfccs

def predict_audio_file(model, label_encoder, mfccs_padded):
    mfccs_padded = mfccs_padded[np.newaxis, ...]  # Add batch dimension
    softmax_output = model.predict(mfccs_padded)[0]  # Get the first batch item
    labels = label_encoder.classes_
    # Reshape the output if necessary
    if softmax_output.shape[0] != len(labels):
        softmax_output = softmax_output.reshape(-1, len(labels))
    probabilities_df = pd.DataFrame(softmax_output.T, index=labels, columns=['Probability'])
    return probabilities_df

def process_directory(directory, model, label_encoder):
    results = []
    for root, dirs, files in os.walk(directory):
        for sub_dir in dirs:
            correct_predictions = 0
            total_files = 0
            sub_dir_path = os.path.join(root, sub_dir)
            print(f"Processing subdirectory: {sub_dir_path}")
            for file in tqdm(os.listdir(sub_dir_path), desc=f"Analyzing {sub_dir}"):
                if file.endswith(('.wav', '.mp3')):
                    file_path = os.path.join(sub_dir_path, file)
                    label_from_filename = file.split('_')[0]  # Assuming the label is the first word in the filename
                    mfccs_padded = process_audio_file(file_path)
                    probabilities_df = predict_audio_file(model, label_encoder, mfccs_padded)
                    predicted_label = probabilities_df['Probability'].idxmax()
                    highest_probability = probabilities_df['Probability'].max()
                    results.append([sub_dir, file, label_from_filename, predicted_label, highest_probability])
                    if predicted_label == label_from_filename:
                        correct_predictions += 1
                    total_files += 1
            
            accuracy = (correct_predictions / total_files) * 100 if total_files > 0 else 0
            results.append([sub_dir, "Accuracy", "", "", accuracy])

    results_df = pd.DataFrame(results, columns=['Subdirectory', 'Filename', 'Correct Label', 'Predicted Label', 'Probability'])
    return results_df

# Custom objects dictionary
custom_objects = {"GradientReversalLayer": GradientReversalLayer}

# Load the pre-trained model and label encoder
model = load_model('saved_data/adversarial-training_custom-cnn_final_model.keras', custom_objects=custom_objects)
all_labels = ['battery', 'description', 'environment', 'greeting', 'health', 'noise', 'nutrition', 'silence', 'sun', 'water']
label_encoder = LabelEncoder()
label_encoder.fit(all_labels)

# Specify the directory containing subdirectories with audio files
directory = '/Users/ciprian/Desktop/Projects/Smart Plant Pot/Audio/Voice Recognition/Testing Samples'

# Process the directory and get predictions along with accuracy
predictions_df = process_directory(directory, model, label_encoder)
predictions_df