In [42]:
import os
import numpy as np
import librosa
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from sklearn.metrics import classification_report

In [43]:
# Define paths and parameters
DATASET_PATH = "/home/jonat/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_train/flac"
LABEL_FILE_PATH = "/home/jonat/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt"
NUM_CLASSES = 2  # Number of classes (bonafide and spoof)
SAMPLE_RATE = 16000  # Sample rate of your audio files
DURATION = 5  # Duration of audio clips in seconds
N_MELS = 128  # Number of Mel frequency bins
max_time_steps = 109  # Define the maximum time steps for your model

In [44]:
labels = {}

with open(LABEL_FILE_PATH, 'r') as label_file:
    lines = label_file.readlines()

for line in lines:
    parts = line.strip().split()
    file_name = parts[1]
    label = 1 if parts[-1] == "bonafide" else 0
    labels[file_name] = label

X = []
y = []

max_time_steps = 109  # Define the maximum time steps for your model

for file_name, label in labels.items():
    file_path = os.path.join(DATASET_PATH, file_name + ".flac")

    # Load audio file using librosa
    audio, _ = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

    # Extract Mel spectrogram using librosa
    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=SAMPLE_RATE, n_mels=N_MELS)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Ensure all spectrograms have the same width (time steps)
    if mel_spectrogram.shape[1] < max_time_steps:
        mel_spectrogram = np.pad(mel_spectrogram, ((0, 0), (0, max_time_steps - mel_spectrogram.shape[1])), mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :max_time_steps]

    X.append(mel_spectrogram)
    y.append(label)

X = np.array(X)
y = np.array(y)

X,y


(array([[[-77.995224, -80.      , -80.      , ..., -80.      ,
          -80.      , -78.569336],
         [-70.491844, -76.31891 , -80.      , ..., -80.      ,
          -80.      , -76.42357 ],
         [-59.412792, -59.22517 , -62.864876, ..., -64.179985,
          -64.98839 , -67.191246],
         ...,
         [-80.      , -80.      , -80.      , ..., -80.      ,
          -80.      , -80.      ],
         [-80.      , -80.      , -80.      , ..., -80.      ,
          -80.      , -80.      ],
         [-80.      , -80.      , -80.      , ..., -80.      ,
          -80.      , -80.      ]],
 
        [[-67.49096 , -73.61943 , -80.      , ..., -80.      ,
          -80.      , -80.      ],
         [-65.48586 , -71.42825 , -80.      , ..., -80.      ,
          -80.      , -80.      ],
         [-61.25968 , -60.12696 , -60.503258, ..., -66.97651 ,
          -61.73862 , -61.10951 ],
         ...,
         [-80.      , -80.      , -80.      , ..., -69.02563 ,
          -69.156944, -7

In [45]:
y_encoded = to_categorical(y, NUM_CLASSES)

split_index = int(0.8 * len(X))
X_train, X_val = X[:split_index], X[split_index:]
y_train, y_val = y_encoded[:split_index], y_encoded[split_index:]

In [46]:
# Define CNN model architecture
input_shape = (N_MELS, max_time_steps, 1)  # Input shape based on your spectrogram dimensions
model_input = Input(shape=input_shape)

# Convolutional layers with batch normalization and max pooling
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(model_input)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(64, kernel_size=(3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

x = Conv2D(128, kernel_size=(3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)

# Global Average Pooling Layer
x = GlobalAveragePooling2D()(x)

# Fully connected layers with dropout
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)

# Output layer with softmax activation
model_output = Dense(NUM_CLASSES, activation='softmax')(x)

In [47]:
# Create and compile the model
model = Model(inputs=model_input, outputs=model_output)

# Define a learning rate schedule
def lr_schedule(epoch):
    if epoch < 10:
        return 0.001
    elif epoch < 20:
        return 0.0005
    else:
        return 0.0001

lr_scheduler = LearningRateScheduler(lr_schedule)

In [49]:
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the Model
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=30,
    validation_data=(X_val, y_val),
    callbacks=[lr_scheduler, early_stopping]
)



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


In [50]:
# Evaluate the Model
loss, accuracy = model.evaluate(X_val, y_val)
print(f"Validation Loss: {loss:.4f}, Validation Accuracy: {accuracy*100:.2f}%")

# Generate predictions on the validation set
y_pred = model.predict(X_val)
y_pred_classes = np.argmax(y_pred, axis=1)

Validation Loss: 0.2392, Validation Accuracy: 89.20%


In [51]:
# Print classification report
class_names = ['Class 0', 'Class 1']  # Update with your class labels
print(classification_report(np.argmax(y_val, axis=1), y_pred_classes, target_names=class_names))

# Save the Model
model.save("advanced_audio_classifier.h5")

              precision    recall  f1-score   support

     Class 0       1.00      0.89      0.94      5076
     Class 1       0.00      0.00      0.00         0

    accuracy                           0.89      5076
   macro avg       0.50      0.45      0.47      5076
weighted avg       1.00      0.89      0.94      5076



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [55]:
import opendatasets as od
import pandas

od.download("https://www.kaggle.com/datasets/andreadiubaldo/wavefake-test")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username:Your Kaggle Key:Downloading wavefake-test.zip to ./wavefake-test


100%|██████████| 26.9G/26.9G [07:26<00:00, 64.8MB/s]



