In [1]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from audiomentations import Compose, TimeStretch, PitchShift, AddGaussianNoise
from tensorflow.keras.regularizers import l2

# Define paths and parameters - use mel spectograms
DATASET_PATH_TRAIN = "/data/common_source/datasets/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_train/flac"
DATASET_PATH_DEV = "/data/common_source/datasets/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_dev/flac"
DATASET_PATH_EVAL = "/data/common_source/datasets/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_eval/flac"
LABEL_FILE_PATH_TRAIN = "/data/common_source/datasets/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt"
LABEL_FILE_PATH_DEV = "/data/common_source/datasets/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt"
LABEL_FILE_PATH_EVAL = "/data/common_source/datasets/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt"
NUM_CLASSES = 2  # Number of classes (bonafide and spoof)
SAMPLE_RATE = 16000  # Sample rate of your audio files
DURATION = 5  # Duration of audio clips in seconds
N_MELS = 128  # Number of Mel frequency bins

2023-10-31 23:53:51.187947: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-31 23:53:51.295210: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-10-31 23:53:51.295229: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-10-31 23:53:51.318349: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-31 23:53:51.782042: W tensorflow/stream_executor/pla

In [3]:
labels = {}

with open(LABEL_FILE_PATH_TRAIN, 'r') as label_train:
    lines = label_train.readlines()

for line in lines:
    parts = line.strip().split()
    file_name = parts[1]
    label = 0 if parts[-1] == "bonafide" else 1
    labels[file_name] = label

X_train = []
y_train = []

max_time_steps = 109  # Define the maximum time steps for your model

for file_name, label in labels.items():
    file_path = os.path.join(DATASET_PATH_TRAIN, file_name + ".flac")

    # Load audio file using librosa
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

    # Extract Mel spectrogram using librosa
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Ensure all spectrograms have the same width (time steps)
    if mel_spectrogram.shape[1] < max_time_steps:
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, max_time_steps - mel_spectrogram.shape[1])), mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :max_time_steps]

    X_train.append(mel_spectrogram)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)


In [4]:
labels = {}

with open(LABEL_FILE_PATH_DEV, 'r') as label_dev:
    lines = label_dev.readlines()

for line in lines:
    parts = line.strip().split()
    file_name = parts[1]
    label = 0 if parts[-1] == "bonafide" else 1
    labels[file_name] = label

X_dev = []
y_dev = []

max_time_steps = 109  # Define the maximum time steps for your model

for file_name, label in labels.items():
    file_path = os.path.join(DATASET_PATH_DEV, file_name + ".flac")

    # Load audio file using librosa
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

    # Extract Mel spectrogram using librosa
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Ensure all spectrograms have the same width (time steps)
    if mel_spectrogram.shape[1] < max_time_steps:
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, max_time_steps - mel_spectrogram.shape[1])), mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :max_time_steps]

    X_dev.append(mel_spectrogram)
    y_dev.append(label)

X_dev = np.array(X_dev)
y_dev = np.array(y_dev)


In [6]:
labels = {}

with open(LABEL_FILE_PATH_EVAL, 'r') as label_eval:
    lines = label_eval.readlines()

for line in lines:
    parts = line.strip().split()
    file_name = parts[1]
    label = 0 if parts[-1] == "bonafide" else 1
    labels[file_name] = label

X_eval = []
y_eval = []

max_time_steps = 109  # Define the maximum time steps for your model

for file_name, label in labels.items():
    file_path = os.path.join(DATASET_PATH_EVAL, file_name + ".flac")

    # Load audio file using librosa
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

    # Extract Mel spectrogram using librosa
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Ensure all spectrograms have the same width (time steps)
    if mel_spectrogram.shape[1] < max_time_steps:
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, max_time_steps - mel_spectrogram.shape[1])), mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :max_time_steps]

    X_eval.append(mel_spectrogram)
    y_eval.append(label)

X_eval = np.array(X_eval)
y_eval = np.array(y_eval)


In [8]:
print (X_train.shape)
print (X_dev.shape)
print (X_eval.shape)

(25380, 128, 109)
(24844, 128, 109)
(71237, 128, 109)


In [2]:
#Saving dava before data agumentation/normalization

# Specify the file paths where you want to save the data
X_train_save_path = '../datasets/melSpectogram/X_train.npy'
y_train_save_path = '../datasets/melSpectogram/y_train.npy'

# Save X_train and y_train as .npy files
np.save(X_train_save_path, X_train)
np.save(y_train_save_path, y_train)


# Specify the file paths where you want to save the data
X_train_save_path = '../datasets/melSpectogram/X_dev.npy'
y_train_save_path = '../datasets/melSpectogram/y_dev.npy'

# Save X_train and y_train as .npy files
np.save(X_train_save_path, X_dev)
np.save(y_train_save_path, y_dev)


# Specify the file paths where you want to save the data
X_train_save_path = '../datasets/melSpectogram/X_eval.npy'
y_train_save_path = '../datasets/melSpectogram/y_eval.npy'

# Save X_train and y_train as .npy files
np.save(X_train_save_path, X_eval)
np.save(y_train_save_path, y_eval)


NameError: name 'X_train' is not defined

In [None]:
# Convert the lists to NumPy arrays
X_train = np.load('../datasets/melSpectogram/X_train.npy')
y_train = np.load('../datasets/melSpectogram/y_train.npy')
y_dev = np.load('../datasets/melSpectogram/y_dev.npy')
y_eval = np.load('../datasets/melSpectogram/y_eval.npy')