In [1]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model



In [None]:
# Define paths and parameters
DATASET_PATH = "Dataset/LA/ASVspoof2019_LA_train/flac"
LABEL_FILE_PATH = "Dataset/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt"
NUM_CLASSES = 2  # Number of classes (bonafide and spoof)
SAMPLE_RATE = 16000  # Sample rate of your audio files
DURATION = 5  # Duration of audio clips in seconds
N_MELS = 128  # Number of Mel frequency bins

In [2]:
labels = {}

with open(LABEL_FILE_PATH, 'r') as label_file:
    lines = label_file.readlines()

for line in lines:
    parts = line.strip().split()
    file_name = parts[1]
    label = 1 if parts[-1] == "bonafide" else 0
    labels[file_name] = label

X = []
y = []

max_time_steps = 109  # Define the maximum time steps for your model

for file_name, label in labels.items():
    file_path = os.path.join(DATASET_PATH, file_name + ".flac")

    # Load audio file using librosa
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

    # Extract Mel spectrogram using librosa
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Ensure all spectrograms have the same width (time steps)
    if mel_spectrogram.shape[1] < max_time_steps:
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, max_time_steps - mel_spectrogram.shape[1])), mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :max_time_steps]

    X.append(mel_spectrogram)
    y.append(label)

FileNotFoundError: [Errno 2] No such file or directory: 'LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt'

In [None]:
X = np.array(X)
y = np.array(y)

X,y

In [None]:
y_encoded = to_categorical(y, NUM_CLASSES)

In [None]:
split_index = int(0.8 * len(X))
X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y_encoded[:split_index], y_encoded[split_index:]

In [None]:
# Define CNN model architecture
input_shape = (N_MELS, X_train.shape[2], 1)  # Input shape for CNN (height, width, channels)
model_input = Input(shape=input_shape)

In [None]:
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(model_input)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.5)(x)
model_output = Dense(NUM_CLASSES, activation='softmax')(x)

In [None]:
model = Model(inputs=model_input, outputs=model_output)

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the Model
model.fit(X_train, y_train, batch_size=32, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7f7adee35750>

In [10]:
# saving the model
model.save("audio_classifier.h5")

NameError: name 'model' is not defined

LOADING THE MODEL

In [2]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model



In [3]:
# Define paths and parameters
TEST_DATASET_PATH = "./TestEvaluation"
MODEL_PATH = "audio_classifier.h5"  # Replace with the actual path to your saved model
SAMPLE_RATE = 16000
DURATION = 5
N_MELS = 128
MAX_TIME_STEPS = 109

In [4]:

from tensorflow.keras.models import load_model

# Load the saved model
model = load_model(MODEL_PATH)

2025-05-19 23:15:43.362896: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-05-19 23:15:43.363188: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-05-19 23:15:43.363193: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-05-19 23:15:43.363892: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-05-19 23:15:43.364376: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [5]:

import os
import numpy as np
import librosa

def predict_audio(file_path):
    """
    Predict whether an audio file is bonafide or spoof.
    
    Args:
        file_path (str): Path to the audio file
        
    Returns:
        numpy.ndarray: Prediction probabilities for each class
    """
    # Load and preprocess the audio file
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)
    
    # Extract Mel spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
    
    # Pad or truncate to match the required time steps
    if mel_spectrogram.shape[1] < MAX_TIME_STEPS:
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, MAX_TIME_STEPS - mel_spectrogram.shape[1])), mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :MAX_TIME_STEPS]
    
    # Reshape for model input
    mel_spectrogram = mel_spectrogram.reshape(1, N_MELS, MAX_TIME_STEPS, 1)
    
    # Get prediction
    prediction = model.predict(mel_spectrogram, verbose=0)
    return prediction[0]



def process_directory(directory_path):
    """
    Recursively processes all audio files in a directory and its subdirectories.
    
    Args:
        directory_path (str): Path to directory containing audio files
    """
    print(f"Processing directory: {directory_path}")
    print("-" * 50)
    
    # List of common audio file extensions to process
    audio_extensions = ['.flac', '.wav', '.mp3', '.opus', '.ogg', '.m4a', '.aac']
    
    # Walk through directory and all subdirectories
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            # Check if file has audio extension
            if any(file.lower().endswith(ext) for ext in audio_extensions):
                file_path = os.path.join(root, file)
                try:
                    prob = predict_audio(file_path)
                    predicted_class = "bonafide" if np.argmax(prob) == 1 else "spoof"
                    confidence = max(prob)
                    print(f"File: {file_path}")
                    print(f"Prediction: {predicted_class} (confidence: {confidence:.2%})")
                    print("-" * 50)
                except Exception as e:
                    print(f"Error processing {file_path}: {str(e)}")

# Example usage
test_directories = [
    "TestEvaluation"
]

# Process each directory
for directory in test_directories:
    if os.path.isdir(directory):
        process_directory(directory)
    else:
        print(f"Error: {directory} is not a valid directory")

Processing directory: TestEvaluation
--------------------------------------------------


2025-05-19 23:15:59.229834: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


File: TestEvaluation/LA_E_5169845.flac
Prediction: bonafide (confidence: 100.00%)
--------------------------------------------------
File: TestEvaluation/LA_E_2601971.flac
Prediction: bonafide (confidence: 100.00%)
--------------------------------------------------
File: TestEvaluation/LA_E_9521934.flac
Prediction: spoof (confidence: 100.00%)
--------------------------------------------------
File: TestEvaluation/LA_E_1007069.flac
Prediction: spoof (confidence: 100.00%)
--------------------------------------------------
File: TestEvaluation/LA_E_4453325.flac
Prediction: bonafide (confidence: 100.00%)
--------------------------------------------------
File: TestEvaluation/LA_E_4065507.flac
Prediction: spoof (confidence: 100.00%)
--------------------------------------------------
File: TestEvaluation/LA_E_1000147.flac
Prediction: bonafide (confidence: 100.00%)
--------------------------------------------------
File: TestEvaluation/LA_E_4785445.flac
Prediction: bonafide (confidence: 100.0