# Importing libraries

In [None]:
pip install noisereduce

In [None]:
import numpy as np
import librosa
import os
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Lambda, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import noisereduce as nr
from sklearn.metrics import accuracy_score, roc_curve, auc, f1_score
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


# Data Pre-proccessing

In [None]:

DATASET_PATH = "/kaggle/input/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_train/flac"
LABEL_FILE_PATH = "/kaggle/input/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt"
SAMPLE_RATE = 16000
DURATION = 5
N_MFCC = 13  # Typical number of MFCC features


In [None]:
def load_and_transform_audio_to_mfcc(filepath, target_duration, sample_rate, n_mfcc, n_fft=2048, hop_length=512, time_steps=109):
    try:
        audio, sr = librosa.load(filepath, sr=sample_rate, duration=target_duration)
        
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
        
        if mfccs.shape[1] < time_steps:
            mfccs = np.pad(mfccs, ((0, 0), (0, time_steps - mfccs.shape[1])), 'constant')
        elif mfccs.shape[1] > time_steps:
            mfccs = mfccs[:, :time_steps]
        
        return mfccs.reshape((n_mfcc, time_steps, 1))
    except Exception as e:
        print(f"Error processing {filepath}: {str(e)}")
        return None

# Feature Extraction

In [None]:



# Initialize the dictionary to hold the first bonafide audio for each speaker
speaker_refs = {}

try:
    with open(LABEL_FILE_PATH, 'r') as label_file:
        lines = label_file.readlines()
except IOError:
    print(f"Could not read file: {LABEL_FILE_PATH}")

# First pass to fill speaker_refs with the first bonafide audio for each speaker
for line in lines:
    parts = line.strip().split()
    speaker_id, audio_filename, label = parts[0], parts[1], parts[-1]
    
    if label == "bonafide" and speaker_id not in speaker_refs:
        speaker_refs[speaker_id] = audio_filename

# Initialize lists to hold pairs of inputs and their labels, limited to 10,000 audios
X_pairs = []  # To hold pairs of inputs
y_pairs = []  # To hold labels for each pair
processed_audios = 0  # Initialize a counter

# Second pass to create input pairs and labels
for line in lines:
    if processed_audios >= 10000:  # Break the loop if the limit is reached
        break

    parts = line.strip().split()
    speaker_id, audio_filename, label = parts[0], parts[1], parts[-1]
    audio_path = os.path.join(DATASET_PATH, audio_filename + ".flac")
    
    if speaker_id in speaker_refs:
        ref_audio_path = os.path.join(DATASET_PATH, speaker_refs[speaker_id] + ".flac")
        test_audio_mfcc = load_and_transform_audio_to_mfcc(audio_path, DURATION, SAMPLE_RATE, N_MFCC)
        ref_audio_mfcc = load_and_transform_audio_to_mfcc(ref_audio_path, DURATION, SAMPLE_RATE, N_MFCC)
        
        if test_audio_mfcc is not None and ref_audio_mfcc is not None:
            X_pairs.append([ref_audio_mfcc, test_audio_mfcc])
            y_pairs.append(1 if label == "bonafide" else 0)
            processed_audios += 1

# Convert lists to numpy arrays for the model
X_pairs = np.array(X_pairs)  # Shape: (num_pairs, 2, n_mfcc, time_steps, 1)
y_pairs = np.array(y_pairs)


In [None]:
X_pairs.shape

# CNN Model Archeticture Using Siamese Network

In [None]:

def build_base_network(input_shape):
    """
    Defines the base network for feature extraction, updated for MFCC input shape.
    """
    input = Input(shape=input_shape)
    x = Conv2D(32, (3, 3), activation='relu')(input)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.25)(x)

    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    x = Dropout(0.25)(x)

    x = Flatten()(x)
    x = Dense(128, activation='relu')(x)
    x = Dense(64, activation='relu')(x)
    return Model(input, x)

def euclidean_distance(vects):
    """
    Computes the Euclidean distance between two vectors.
    """
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))

def build_siamese_model(input_shape):
    """
    Constructs the Siamese network architecture, updated for MFCC input shape.
    """
    left_input = Input(input_shape)
    right_input = Input(input_shape)

    base_network = build_base_network(input_shape)
    
    left_output = base_network(left_input)
    right_output = base_network(right_input)

    distance = Lambda(euclidean_distance)([left_output, right_output])

    prediction = Dense(1, activation='sigmoid')(distance)

    siamese_net = Model(inputs=[left_input, right_input], outputs=prediction)

    siamese_net.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return siamese_net

# Assuming your MFCCs have shape (13, 109, 1) based on the error message
input_shape = (13, 109, 1)  # Update this based on your actual MFCC shape
siamese_model = build_siamese_model(input_shape)



# Adjust input data for training the model


In [None]:
# Adjust the input_shape for MFCC
input_shape = (13, 109, 1)  # Updated for MFCC

# Make sure the build_siamese_model function is updated to accept this input_shape
siamese_model = build_siamese_model(input_shape)

# Assuming X_pairs and y_pairs are structured correctly
left_inputs = np.array([pair[0] for pair in X_pairs])
right_inputs = np.array([pair[1] for pair in X_pairs])

# Ensure left_inputs and right_inputs are reshaped correctly for the model
left_inputs = left_inputs.reshape(-1, 13, 109, 1)  # Reshape to match the model's expected input
right_inputs = right_inputs.reshape(-1, 13, 109, 1)  # Same for the right inputs

# Model training
siamese_model.fit([left_inputs, right_inputs], y_pairs, batch_size=32, epochs=10, validation_split=0.2)


In [None]:
from keras.models import Model
import numpy as np
import matplotlib.pyplot as plt

# Assuming build_siamese_model and your data preparation code goes here

# Fit the model and capture the history
history = siamese_model.fit([left_inputs, right_inputs], y_pairs, batch_size=32, epochs=10, validation_split=0.2)

# Extract accuracy and loss values
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
train_loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(train_accuracy) + 1)

# Accuracy curve
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, train_accuracy, 'b-', label='Training Accuracy')
plt.plot(epochs, val_accuracy, 'r-', label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Adjust y-axis to include 100% clearly
accuracy_padding = (max(train_accuracy + val_accuracy) - min(train_accuracy + val_accuracy)) * 0.4  # Padding
accuracy_upper_limit = min(1, max(train_accuracy + val_accuracy) + accuracy_padding) * 1.01  # Slightly above 100%
plt.ylim([max(0, min(train_accuracy + val_accuracy) - accuracy_padding), accuracy_upper_limit])

# Loss curve
plt.subplot(1, 2, 2)
plt.plot(epochs, train_loss, 'b-', label='Training Loss')
plt.plot(epochs, val_loss, 'r-', label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

# Adjust loss y-axis similarly, if necessary
loss_padding = (max(train_loss + val_loss) - min(train_loss + val_loss)) * 0.4  # Padding
plt.ylim([min(train_loss + val_loss) - loss_padding, max(train_loss + val_loss) + loss_padding])

plt.tight_layout()
plt.show()

# Test the model

# Testing Model on ASVspoof 2019 dataset

In [None]:
# Assuming siamese_model is your trained Siamese network model
DATASET_PATH="/kaggle/input/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_dev/flac"

LABEL_FILE_PATH = "/kaggle/input/asvpoof-2019-dataset/LA/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt"

# Function to load and prepare testing pairs from the label file, including bonafide audios for testing
def load_testing_pairs(label_file_path, dataset_path, limit=5000):
    speaker_refs = {}  # Dictionary to hold the first bonafide audio for each speaker
    testing_pairs = []
    counter = 0

    try:
        with open(label_file_path, 'r') as file:
            lines = file.readlines()
            for line in lines:
                if counter >= limit:
                    break
                parts = line.strip().split()
                speaker_id, audio_filename, label = parts[0], parts[1], parts[-1]

                audio_filename_with_extension = audio_filename + ".flac"

                if label == "bonafide":
                    if speaker_id not in speaker_refs:
                        # Store the first "bonafide" audio filename for each speaker
                        speaker_refs[speaker_id] = audio_filename_with_extension
                        # Add this bonafide audio as a testing pair with itself as a reference
                        testing_pairs.append((speaker_id, audio_filename_with_extension, audio_filename_with_extension, label))
                    else:
                        # For additional "bonafide" audios, use the first "bonafide" audio as reference
                        ref_audio_filename = speaker_refs[speaker_id]
                        testing_pairs.append((speaker_id, ref_audio_filename, audio_filename_with_extension, label))
                elif speaker_id in speaker_refs:
                    # For "fake" audios, find their corresponding reference "bonafide" audio
                    ref_audio_filename = speaker_refs[speaker_id]
                    testing_pairs.append((speaker_id, ref_audio_filename, audio_filename_with_extension, label))

                counter += 1

    except IOError:
        print(f"Could not read file: {label_file_path}")

    return testing_pairs

# Load testing pairs with a limit of 5000 audios
testing_pairs = load_testing_pairs(LABEL_FILE_PATH, DATASET_PATH, limit=5000)

In [None]:

# Function to process and prepare pairs for testing
def prepare_test_pairs(testing_pairs, dataset_path, sample_rate, duration, n_mfcc, n_fft, hop_length, time_steps):
    X_test_pairs = []
    y_test_labels = []
    
    for speaker_id, reference_audio, test_audio, label in testing_pairs:
        # Remove the additional ".flac" since it's already included in the file names
        ref_audio_path = os.path.join(dataset_path, reference_audio)  # Assuming '.flac' is already in reference_audio
        test_audio_path = os.path.join(dataset_path, test_audio)  # Assuming '.flac' is already in test_audio
        
        ref_mfcc_db = load_and_transform_audio_to_mfcc(ref_audio_path, duration, sample_rate, n_mfcc, n_fft, hop_length, time_steps)
        test_mfcc_db = load_and_transform_audio_to_mfcc(test_audio_path, duration, sample_rate, n_mfcc, n_fft, hop_length, time_steps)
        
        if ref_mfcc_db is not None and test_mfcc_db is not None:
            X_test_pairs.append([ref_mfcc_db, test_mfcc_db])
            y_test_labels.append(1 if label == "bonafide" else 0)
    
    return np.array(X_test_pairs), np.array(y_test_labels)

# Prepare test pairs
X_test_pairs, y_test_labels = prepare_test_pairs(testing_pairs, DATASET_PATH, SAMPLE_RATE, DURATION, N_MFCC, n_fft=2048, hop_length=512, time_steps=109)

# Reshape test pairs for the model
left_inputs = np.array([pair[0] for pair in X_test_pairs]).reshape(-1, N_MFCC, 109, 1)
right_inputs = np.array([pair[1] for pair in X_test_pairs]).reshape(-1, N_MFCC, 109, 1)

# Testing the model
predictions = siamese_model.predict([left_inputs, right_inputs])



In [None]:
from sklearn.metrics import accuracy_score, roc_curve, auc, f1_score
import matplotlib.pyplot as plt

# Convert predictions to binary outcomes
binary_predictions = [1 if pred >= 0.5 else 0 for pred in predictions.ravel()]

# Calculate accuracy
accuracy = accuracy_score(y_test_labels, binary_predictions)*100

print(f"Accuracy: {accuracy:.4f} %")

# Calculate F1 score
f1 = f1_score(y_test_labels, binary_predictions)*100
print(f"F1 Score: {f1:.4f} %" )

# Compute ROC curve and ROC area
fpr, tpr, thresholds = roc_curve(y_test_labels, predictions.ravel())
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


In [None]:
from sklearn.metrics import precision_score, recall_score, confusion_matrix

# Calculate precision and recall
precision = precision_score(y_test_labels, binary_predictions) * 100
recall = recall_score(y_test_labels, binary_predictions) * 100

# Calculate specificity
tn, fp, _, _ = confusion_matrix(y_test_labels, binary_predictions).ravel()
specificity = (tn / (tn + fp)) * 100

print(f"Precision: {precision:.4f} %")
print(f"Recall: {recall:.4f} %")
print(f"Specificity: {specificity:.4f} %")


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming binary_predictions are available from the previous step

# Generate the confusion matrix
cm = confusion_matrix(y_test_labels, binary_predictions)

# Plotting the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap='Blues', xticklabels=['Fake', 'Real'], yticklabels=['Fake', 'Real'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


# Testing Model on Deep-voice dataset

In [None]:
import os
import numpy as np
import librosa
real_audio_dir="/kaggle/input/deep-voice-deepfake-voice-recognition/KAGGLE/AUDIO/REAL"

fake_audio_dir = "/kaggle/input/deep-voice-deepfake-voice-recognition/KAGGLE/AUDIO/FAKE"
def load_and_transform_audio_to_mfcc(filepath, target_duration, sample_rate, n_mfcc, n_fft=2048, hop_length=512, time_steps=109):
    try:
        audio, sr = librosa.load(filepath, sr=sample_rate, duration=target_duration)
        # Apply noise reduction
        reduced_noise_audio = librosa.effects.preemphasis(audio)
        mfccs = librosa.feature.mfcc(y=reduced_noise_audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
        
        if mfccs.shape[1] < time_steps:
            mfccs = np.pad(mfccs, ((0, 0), (0, time_steps - mfccs.shape[1])), 'constant')
        elif mfccs.shape[1] > time_steps:
            mfccs = mfccs[:, :time_steps]
        
        return mfccs.reshape((n_mfcc, time_steps, 1))
    except Exception as e:
        print(f"Error processing {filepath}: {str(e)}")
        return None

# Assuming `real_audio_dir` and `fake_audio_dir` are defined and contain your audio files
real_audio_files = [os.path.join(real_audio_dir, file) for file in os.listdir(real_audio_dir) if file.endswith(".wav")]
fake_audio_files = [os.path.join(fake_audio_dir, file) for file in os.listdir(fake_audio_dir) if file.endswith(".wav")]

SAMPLE_RATE = 16000
DURATION = 5
N_MELS = 13
time_steps = 109

all_predictions = []
all_true_labels = []
pair_info = []

# Helper function to extract prefix
def get_prefix(filename):
    
    return os.path.basename(filename).lower().split('-')[0]

# Compare each real audio with itself and its corresponding fake audio
for real_audio_path in real_audio_files:
    real_prefix = get_prefix(real_audio_path)
    
    # Compare with itself
    test_audio_paths = [real_audio_path] + [fake_path for fake_path in fake_audio_files if get_prefix(fake_path) == real_prefix]
    
    for test_audio_path in test_audio_paths:
        is_real_pair = real_audio_path == test_audio_path
        
        ref_mfcc_db = load_and_transform_audio_to_mfcc(real_audio_path, DURATION, SAMPLE_RATE, N_MELS, n_fft=2048, hop_length=512, time_steps=time_steps)
        test_mfcc_db = load_and_transform_audio_to_mfcc(test_audio_path, DURATION, SAMPLE_RATE, N_MELS, n_fft=2048, hop_length=512, time_steps=time_steps)
        
        if ref_mfcc_db is not None and test_mfcc_db is not None:
            ref_mfcc_db_reshaped = ref_mfcc_db.reshape(1, N_MELS, time_steps, 1)
            test_mfcc_db_reshaped = test_mfcc_db.reshape(1, N_MELS, time_steps, 1)
            prediction = siamese_model.predict([ref_mfcc_db_reshaped, test_mfcc_db_reshaped])[0][0]
            
            all_predictions.append(prediction)
            all_true_labels.append(1 if is_real_pair else 0)  # 1 for real, 0 for fake
            pair_info.append((real_audio_path, test_audio_path))

# Proceed to evaluate the predictions with your preferred metrics


In [None]:
# Convert predictions to binary using 0.5 threshold
predicted_labels = [1 if pred >= 0.5 else 0 for pred in all_predictions]

# True labels should be in the correct binary format (0s and 1s), ensure this before proceeding
true_labels = all_true_labels

# Calculate accuracy and F1 score
accuracy = accuracy_score(true_labels, predicted_labels)*100

print(f"Accuracy: {accuracy:.4f} %")

# Generate confusion matrix
conf_matrix = confusion_matrix(true_labels, predicted_labels)
print("Confusion Matrix:")
print(conf_matrix)
# Calculate ROC Curve and AUC
fpr, tpr, thresholds = roc_curve(true_labels, predicted_labels)
roc_auc = auc(fpr, tpr)
# Plotting the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap='Blues', xticklabels=['Fake', 'Real'], yticklabels=['Fake', 'Real'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, auc, roc_curve
import matplotlib.pyplot as plt
import seaborn as sns

# Assuming `all_predictions` and `all_true_labels` are defined and valid
# Your existing code for predictions, accuracy, and confusion matrix goes here

# Calculate precision, recall, and F1 score
precision = precision_score(true_labels, predicted_labels) * 100
recall = recall_score(true_labels, predicted_labels) * 100
f1 = f1_score(true_labels, predicted_labels) * 100

# Calculate specificity from the confusion matrix
tn, fp, fn, tp = conf_matrix.ravel()
specificity = (tn / (tn + fp)) * 100

print(f"Precision: {precision:.4f} %")
print(f"Recall: {recall:.4f} %")
print(f"Specificity: {specificity:.4f} %")
print(f"F1 Score: {f1:.4f} %")

# Plot ROC Curve
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


In [None]:
def load_and_transform_audio_to_mfcc(filepath, target_duration, sample_rate, n_mfcc, n_fft=2048, hop_length=512, time_steps=109):
    try:
        audio, sr = librosa.load(filepath, sr=sample_rate, duration=target_duration)
        
        mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)
        
        if mfccs.shape[1] < time_steps:
            mfccs = np.pad(mfccs, ((0, 0), (0, time_steps - mfccs.shape[1])), 'constant')
        elif mfccs.shape[1] > time_steps:
            mfccs = mfccs[:, :time_steps]
        
        return mfccs.reshape((n_mfcc, time_steps, 1))
    except Exception as e:
        print(f"Error processing {filepath}: {str(e)}")
        return None

# Testing Model on ASVspoof2021 dataset

In [None]:
# Assuming siamese_model is your trained Siamese network model
DATASET_PATH="/kaggle/input/asvspoof-21-df/ASVspoof2021_DF_eval_part00/ASVspoof2021_DF_eval/flac"
LABEL_FILE_PATH = "/kaggle/input/metadata/trial_metadata.txt"
SAMPLE_RATE = 16000
DURATION = 5
N_MFCC = 13  

# Function to load and prepare testing pairs from the label file, including bonafide audios for testing
def load_testing_pairs(label_file_path, dataset_path, limit=2000):
    speaker_refs = {}  # Dictionary to hold the first bonafide audio for each speaker
    testing_pairs = []
    counter = 0

    try:
        with open(label_file_path, 'r') as file:
            lines = file.readlines()
            for line in lines:
                if counter >= limit:
                    break
                parts = line.strip().split()
                speaker_id, audio_filename, label = parts[0], parts[1], parts[5]

                audio_filename_with_extension = audio_filename + ".flac"

                if label == "bonafide":
                    if speaker_id not in speaker_refs:
                        # Store the first "bonafide" audio filename for each speaker
                        speaker_refs[speaker_id] = audio_filename_with_extension
                        # Add this bonafide audio as a testing pair with itself as a reference
                        testing_pairs.append((speaker_id, audio_filename_with_extension, audio_filename_with_extension, label))
                    else:
                        # For additional "bonafide" audios, use the first "bonafide" audio as reference
                        ref_audio_filename = speaker_refs[speaker_id]
                        testing_pairs.append((speaker_id, ref_audio_filename, audio_filename_with_extension, label))
                elif speaker_id in speaker_refs:
                    # For "fake" audios, find their corresponding reference "bonafide" audio
                    ref_audio_filename = speaker_refs[speaker_id]
                    testing_pairs.append((speaker_id, ref_audio_filename, audio_filename_with_extension, label))

                counter += 1

    except IOError:
        print(f"Could not read file: {label_file_path}")

    return testing_pairs

# Load testing pairs with a limit of 5000 audios
testing_pairs = load_testing_pairs(LABEL_FILE_PATH, DATASET_PATH, limit=2000)
# Function to process and prepare pairs for testing
def prepare_test_pairs(testing_pairs, dataset_path, sample_rate, duration, n_mfcc, n_fft, hop_length, time_steps):
    X_test_pairs = []
    y_test_labels = []
    
    for speaker_id, reference_audio, test_audio, label in testing_pairs:
        # Remove the additional ".flac" since it's already included in the file names
        ref_audio_path = os.path.join(dataset_path, reference_audio)  # Assuming '.flac' is already in reference_audio
        test_audio_path = os.path.join(dataset_path, test_audio)  # Assuming '.flac' is already in test_audio
        
        ref_mfcc_db = load_and_transform_audio_to_mfcc(ref_audio_path, duration, sample_rate, n_mfcc, n_fft, hop_length, time_steps)
        test_mfcc_db = load_and_transform_audio_to_mfcc(test_audio_path, duration, sample_rate, n_mfcc, n_fft, hop_length, time_steps)
        
        if ref_mfcc_db is not None and test_mfcc_db is not None:
            X_test_pairs.append([ref_mfcc_db, test_mfcc_db])
            y_test_labels.append(1 if label == "bonafide" else 0)
    
    return np.array(X_test_pairs), np.array(y_test_labels)

# Prepare test pairs
X_test_pairs, y_test_labels = prepare_test_pairs(testing_pairs, DATASET_PATH, SAMPLE_RATE, DURATION, N_MFCC, n_fft=2048, hop_length=512, time_steps=109)

# Reshape test pairs for the model
left_inputs = np.array([pair[0] for pair in X_test_pairs]).reshape(-1, N_MFCC, 109, 1)
right_inputs = np.array([pair[1] for pair in X_test_pairs]).reshape(-1, N_MFCC, 109, 1)

# Testing the model
predictions = siamese_model.predict([left_inputs, right_inputs])



In [None]:
from sklearn.metrics import accuracy_score, roc_curve, auc, f1_score
import matplotlib.pyplot as plt

# Convert predictions to binary outcomes
binary_predictions = [1 if pred >= 0.5 else 0 for pred in predictions.ravel()]

# Calculate accuracy
accuracy = accuracy_score(y_test_labels, binary_predictions)*100

print(f"Accuracy: {accuracy:.4f} %")

# Calculate F1 score
f1 = f1_score(y_test_labels, binary_predictions)*100
print(f"F1 Score: {f1:.4f} %" )

# Compute ROC curve and ROC area
fpr, tpr, thresholds = roc_curve(y_test_labels, predictions.ravel())
roc_auc = auc(fpr, tpr)

# Plot ROC curve
plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()


In [None]:
from sklearn.metrics import precision_score, recall_score, confusion_matrix

# Calculate precision and recall
precision = precision_score(y_test_labels, binary_predictions) * 100
recall = recall_score(y_test_labels, binary_predictions) * 100

# Calculate specificity
tn, fp, _, _ = confusion_matrix(y_test_labels, binary_predictions).ravel()
specificity = (tn / (tn + fp)) * 100

print(f"Precision: {precision:.4f} %")
print(f"Recall: {recall:.4f} %")
print(f"Specificity: {specificity:.4f} %")


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming binary_predictions are available from the previous step

# Generate the confusion matrix
cm = confusion_matrix(y_test_labels, binary_predictions)

# Plotting the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap='Blues', xticklabels=['Fake', 'Real'], yticklabels=['Fake', 'Real'])
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.show()


# Save Model to Export

In [None]:
siamese_model.save_weights("siamese_model.weights.h5")