## Part 1 - Organize the Audio Data

In [1]:
import utils as u
import librosa
import soundfile as sf
import os
import numpy as np

In [2]:
# No need to convert again in the future, just use the files in wav_files folder directly
data_home = '/Volumes/T7/group5_DL4M/data/dev_set'
# u.convert_flac_to_wav_librosa(data_home)

In [3]:
wav_directory = '/Volumes/T7/group5_DL4M/dev_set/wav_files'
label_file = '/Users/mona/Documents/GitHub/group_5_DL4M1/data/dev.txt'

In [4]:
audio_data, audio_labels = u.load_data(wav_directory, label_file)

FileNotFoundError: [Errno 2] No such file or directory: '/Volumes/T7/group5_DL4M/dev_set/wav_files'

In [None]:
# Example of using loaded data
print("Loaded", len(audio_data), "audio files.")
print("Sample labels:", audio_labels[:10]) # Print the first 10 labels

Look at one sample from the dataset

In [None]:
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio
import numpy as np

# Load an audio file
file_path = '/Volumes/T7/group5_DL4M/dev_set/wav_files/CtrSVDD_0059_D_0000530.wav'
audio, sr = librosa.load(file_path, sr=None)  # Load with the original sample rate

# Play the audio
display(Audio(data=audio, rate=sr))

# Plot the spectrogram. refer: https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.colorbar.html
plt.figure(figsize=(10, 4))
S = librosa.feature.melspectrogram(y=audio, sr=sr)
S_dB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000)
plt.colorbar(format='%+2.0f dB')
plt.title('Mel-frequency spectrogram')
plt.tight_layout()
plt.show()

print("sr=", sr)

Define Splite

In [None]:
train_data, val_data, test_data, train_labels, val_labels, test_labels = u.split_data(
    audio_data, audio_labels, test_size=0.2, val_size=0.2)

print("Training data size:", len(train_data))
print("Validation data size:", len(val_data))
print("Test data size:", len(test_data))

In [None]:
u.plot_label_distribution(audio_labels)

In [None]:
u.plot_audio_length_distribution(audio_data, sr)

### Data Preprocessing

MFCC & LFCC 

In [None]:
# Look at the lfcc of one audio file
audio, sr = librosa.load('/Volumes/T7/group5_DL4M/dev_set/wav_files/CtrSVDD_0110_D_0012049.wav', sr=None)  # Load an audio file
mfccs = u.compute_mfcc(audio, sr)
print("MFCCs:", mfccs.shape)

In [None]:
u.plot_mfcc(mfccs, sr)

In [None]:
lfccs = u.compute_lfcc(audio, sr)
print("LFCCs:", lfccs.shape)

In [None]:
u.plot_lfcc(lfccs, sr)

Prepare Data for Modeling

In [None]:
# Batch Processing for Feature Extraction
# Testing on 2 files to see results
test_file_paths = ['/Volumes/T7/group5_DL4M/dev_set/wav_files/CtrSVDD_0059_D_0000525.wav',
                   '/Volumes/T7/group5_DL4M/dev_set/wav_files/CtrSVDD_0059_D_0000526.wav']  
test_features, test_sr = u.load_and_extract_features(test_file_paths, feature_type='lfcc')

print("Shape of extracted features:", test_features.shape)
print("Sample rate used:", test_sr)

In [None]:
# Test storing features in h5 file
import h5py

# Create an HDF5 file
with h5py.File('/Users/mona/Documents/GitHub/group_5_DL4M1/data/test_LFCC_features.h5', 'w') as h5f:
    h5f.create_dataset('LFCC_dataset_test', data=np.array(test_features))

In [None]:
test_features, test_sr = u.load_and_extract_features(test_file_paths, feature_type='mfcc')

print("Shape of extracted features:", test_features.shape)
print("Sample rate used:", test_sr)

In [None]:
def process_file(file_path):
    features, _ = u.load_and_extract_features([file_path], feature_type='lfcc')
    return features

In [None]:
dataset_directory = '/Volumes/T7/group5_DL4M/dev_set/wav_files'

# Refer to ChatGPT
audio_files = [os.path.join(dataset_directory, f) for f in os.listdir(dataset_directory) if f.endswith('.wav') and os.path.isfile(os.path.join(dataset_directory, f))]

# Process for the dataset
all_features = []
for file_path in audio_files:
    feature = process_file(file_path)
    if feature is not None:
        all_features.append(feature)

valid_features = []
shapes = set()
for feature in all_features:
    valid_features.append(feature)
    shapes.add(feature.shape)

print("Unique feature shapes:", shapes)

In [None]:
# Determine the maximum length from the shapes printed
max_length = 0
for shape in shapes:
    length = shape[1]
    if length > max_length:

        max_length = length

# Pad all features to the maximum length
uniform_features = u.pad_features(all_features, max_length)

# Convert list of arrays into a single numpy array for storage
uniform_features_array = np.array(uniform_features)

In [None]:
# Create an HDF5 file
with h5py.File('/Users/mona/Documents/GitHub/group_5_DL4M1/data/LFCC_features.h5', 'w') as h5f:
    # Create a dataset in the file
    h5f.create_dataset('LFCC_dataset_1', data=np.array(uniform_features_array))


In [None]:
print("Shape of the dataset:", uniform_features_array.shape)

In [None]:
# Split the data again after feature extraction
train_data_feature, val_data_feature, test_data_feature, train_labels, val_labels, test_labels = u.split_data(
    uniform_features_array, audio_labels, test_size=0.2, val_size=0.2)

print("Training data size:", len(train_data_feature))
print("Validation data size:", len(val_data_feature))
print("Test data size:", len(test_data_feature))

### Model Training

#### Data preperation for model training

In [None]:
# Turning labels into numpyarray format
train_labels = np.array(train_labels)
val_labels = np.array(val_labels)

In [None]:
#import tensorflow as tf
#train_labels_reshaped = tf.reshape(train_labels, [train_labels.shape[0], 1, 1])
#val_labels_reshaped = tf.reshape(val_labels, [val_labels.shape[0], 1, 1])

In [None]:
# Add one more dimension to the dataset shape
train_data_feature = train_data_feature[..., np.newaxis]
val_data_feature = val_data_feature[..., np.newaxis]
test_data_feature = test_data_feature[..., np.newaxis]

In [None]:
# Turn dataset into numpyarray "float" format
train_data_feature_1 = np.array(train_data_feature, dtype=float)
val_data_feature_1 = np.array(val_data_feature, dtype=float)
test_data_feautre_1 = np.array(test_data_feature, dtype=float)

In [None]:
# Turn labels into numeric format
train_labels = np.where(train_labels == 'deepfake', 1, 0)
val_labels = np.where(val_labels == 'deepfake', 1, 0)
test_labels = np.where(test_labels == 'deepfake', 1, 0)

In [None]:
# Check the shape of both datasets and labels
print("Train data shape:", train_data_feature_1.shape)
print("Train labels shape:", train_labels.shape)
print("Validation data shape:", val_data_feature_1.shape)
print("Validation labels shape:", val_labels.shape)

In [None]:
# Check the shape of test dataset & label
print("Feature array shape:", uniform_features_array.shape)

input_shape = uniform_features_array.shape  
print("Input shape for model:", input_shape)

#### Define Model

In [None]:
input_shape = uniform_features_array.shape 
from models import build_model
model = build_model(input_shape)
model.summary()

#### Fit the Model

In [None]:
import keras
callbacks = [
    keras.callbacks.ModelCheckpoint(
        filepath=f"checkpoints/audio_convnet.keras",
        save_best_only=True,
        monitor="val_loss")
]

batch_size = 32
history = model.fit(
        x=train_data_feature_1,
        y=train_labels, 
        validation_data=(val_data_feature_1, val_labels),
        epochs=10,
        batch_size=batch_size,
        callbacks=callbacks
    )

#### Check loss and accuracy

In [None]:
u.plot_loss(history)

#### Load Checkpoint

In [None]:
# Load the best checkpoint of the model
model_reloaded = keras.models.load_model("checkpoints/audio_convnet.keras")

In [None]:
print(test_data_feature)

In [None]:
# Get a single batch item from the test dataset and check the accuracy
sample_test = test_data_feature[0:1]  
try:
    result = model_reloaded.predict(sample_test)
    print("Single sample prediction:", result)
except Exception as e:
    print("Error during single sample prediction:", e)

In [None]:
model_reloaded.summary()

### Evaluation

In [None]:
from sklearn.metrics import roc_curve
from keras.models import load_model

# Load the saved model
model = load_model('/Users/mona/Documents/GitHub/group_5_DL4M1/checkpoints/audio_convnet.keras')

In [None]:
# Load the h5 file
# Refer to https://docs.h5py.org/en/stable/high/file.html
import h5py
def load_features_h5(file_path):
    with h5py.File(file_path, 'r') as hf:
        for key in hf.keys():
            try:
                features = hf[key][:]
                return features
            except Exception as e:
                print(f"Error loading dataset '{key}': {e}")
    return None


file_path = '/Users/mona/Documents/GitHub/group_5_DL4M1/data/LFCC_features.h5'  
features = load_features_h5(file_path)

In [None]:
if features is not None:
    print(features.shape)

In [None]:
# If run through, comment this cell - just for kernal restart
import numpy as np
test_data_feature = features[..., np.newaxis]

In [None]:
# If run through, comment this cell - just for kernal restart
test_data_feautre_1 = np.array(test_data_feature, dtype=float)

In [None]:
print(test_data_feautre_1.shape)

In [None]:
# Load 40 sample from the dev_set with true labels
import utils as u
#wav_directory = '/Volumes/T7/group5_DL4M/dev_set/wav_files'
wav_directory = '/Users/mona/Documents/GitHub/group_5_DL4M1/predictions audio file'
label_file = '/Users/mona/Documents/GitHub/group_5_DL4M1/True Lables Test.txt'
_, y_labels = u.load_data(wav_directory, label_file)

In [None]:
print(y_labels)

In [None]:
import os
import utils as u
predict_directory = '/Users/mona/Documents/GitHub/group_5_DL4M1/predictions audio file'

# Refer to ChatGPT
audio_files = [os.path.join(predict_directory, f) for f in os.listdir(predict_directory) if f.endswith('.wav') and os.path.isfile(os.path.join(predict_directory, f))]

# Alternative if not using parallel processing
predict_all_features = []
for file_path in audio_files:
    feature = process_file(file_path)
    if feature is not None:
        predict_all_features.append(feature)

# Optionally filter out None values and check shapes
valid_features = []
shapes = set()
for feature in predict_all_features:
    valid_features.append(feature)
    shapes.add(feature.shape)

print("feature shapes:", shapes)

In [None]:
# Pad a list of feature arrays to a target length

def pad_features(features, target_length):
    """ 
    Pad the features along the time axis to a target length.
    
    features (list of np.array): List of feature arrays with shape (13, variable_length).
    target_length (int): The target length to pad the time dimension to.

    Returns:
    np.array: Array of padded features with shape (n_samples, 13, target_length).
    
    """
    padded_features = np.zeros((len(features), features[0].shape[0], target_length))
    
    for i, feature in enumerate(features):
        sequence_length = feature.shape[1]

        if sequence_length > target_length:
            padded_features[i, :, :] = feature[:, :target_length]

        else:
            padded_features[i, :, :sequence_length] = feature
    
    return padded_features

target_length = 798

predict_uniform_features_array = pad_features(predict_all_features, target_length)
# Add one more dimension to match the shape
predict_uniform_features_array = predict_uniform_features_array[..., np.newaxis]

In [None]:
print(predict_uniform_features_array.shape)

In [None]:
predictions = model.predict(predict_uniform_features_array)

In [None]:
# Check if the length of labels and predictions audio are same
print(f'Length of y_labels: {len(y_labels)}')
print(f'Length of predictions: {len(predictions)}')

# If lengths differ, find out if any specific entries are missing
if len(y_labels) != len(predictions):
    print("Mismatch in array lengths detected. Investigating further...")

In [None]:
y_numeric = np.array([1 if label == 'bonafide' else 0 for label in y_labels])

# Then use these numeric labels to calculate EER
eer, threshold = u.calculate_eer(y_numeric, predictions)
print(f"EER: {eer:.2%}, Threshold: {threshold}")

In [None]:
u.plot_det_curve(y_numeric, predictions)