In [1]:
!pip install gdown

Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Downloading gdown-5.2.0-py3-none-any.whl (18 kB)
Installing collected packages: gdown
Successfully installed gdown-5.2.0


In [3]:
import gdown


file_id = '1Az1o3AfUM73lGlpXVsU_XvDBokFobk5j'
url = f'https://drive.google.com/uc?id={file_id}'

output = 'test.zip'  
gdown.download(url, output, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=1Az1o3AfUM73lGlpXVsU_XvDBokFobk5j
From (redirected): https://drive.google.com/uc?id=1Az1o3AfUM73lGlpXVsU_XvDBokFobk5j&confirm=t&uuid=0cb97e15-c029-4aec-a148-b795d9a63f65
To: /kaggle/working/test.zip
100%|██████████| 4.71G/4.71G [00:56<00:00, 84.0MB/s]


'test.zip'

In [4]:
import zipfile

local_zip_path = '/kaggle/working/test.zip'

local_extract_path = '/kaggle/working/'

import os
os.makedirs(local_extract_path, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(local_zip_path, 'r') as zip_ref:
    zip_ref.extractall(local_extract_path)

In [5]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [6]:
file_id = '1EwCM0LgqqoHqCWkIjHffgzl_WxF2eirz'
url = f'https://drive.google.com/uc?id={file_id}'

# Download the file
output = 'files.csv'  
gdown.download(url, output, quiet=False)

Downloading...
From: https://drive.google.com/uc?id=1EwCM0LgqqoHqCWkIjHffgzl_WxF2eirz
To: /kaggle/working/files.csv
100%|██████████| 4.69M/4.69M [00:00<00:00, 111MB/s]


'files.csv'

In [8]:
# Load validation data
df = pd.read_csv('files.csv')
df = df[df['Set'] == 'eval']  # Only hidden validation set

mfcc_features = []
cqt_features = []
labels = []

i = 1
for index, row in df.iterrows():
    
    mfcc = np.load('mfccs/' + row['Name'] + ".npy")
    mfcc_features.append(mfcc)

    cqt = np.load("cqts/" + row['Name'] + ".npy")
    cqt_features.append(cqt)

    labels.append(1 if row['Type'] == 'spoof' else 0)
    i += 1
    if(i==50000):
      break
# Load validation data
print(i)
df = pd.read_csv('files.csv')
df = df[df['Set'] == 'hidden']
# Convert lists to numpy arrays

for index, row in df.iterrows():
    
    mfcc = np.load('mfccs/' + row['Name'] + ".npy")
    mfcc_features.append(mfcc)

    cqt = np.load("cqts/" + row['Name'] + ".npy")
    cqt_features.append(cqt)

    # Convert label to binary (1 for spoof, 0 for bonafide)
    labels.append(1 if row['Type'] == 'spoof' else 0)
    i += 1

print(i)
mfcc_features = np.array(mfcc_features)  # Shape: (num_samples, num_mfcc, timesteps)
cqt_features = np.array(cqt_features)    # Shape: (num_samples, num_cqt, timesteps)
labels = np.array(labels)                # Shape: (num_samples,)

50000
66926


In [9]:
# Add a channel dimension to MFCC and CQT features
mfcc_features = np.expand_dims(mfcc_features, axis=1)  # Shape: (num_samples, 1, num_mfcc, timesteps)
cqt_features = np.expand_dims(cqt_features, axis=1)    # Shape: (num_samples, 1, num_cqt, timesteps)
print(mfcc_features.shape)
print(cqt_features.shape)

(66925, 1, 13, 150)
(66925, 1, 84, 150)


In [10]:
# Create TensorDataset for validation set
val_dataset = TensorDataset(
    torch.tensor(mfcc_features, dtype=torch.float32),
    torch.tensor(cqt_features, dtype=torch.float32),
    torch.tensor(labels, dtype=torch.float32)
)

# Create DataLoader
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# Replace the with required class (2 or 3 hidden layers)

class Res2NetBlock(nn.Module):
    def __init__(self, in_channels, out_channels, scales=4):
        super(Res2NetBlock, self).__init__()
        self.scales = scales

        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.convs = nn.ModuleList([
            nn.Conv2d(out_channels // scales, out_channels // scales, kernel_size=3, padding=1, bias=False)
            for _ in range(scales - 1)
        ])
        self.bns = nn.ModuleList([nn.BatchNorm2d(out_channels // scales) for _ in range(scales - 1)])

        self.conv3 = nn.Conv2d(out_channels // 2, out_channels, kernel_size=1, bias=False)
        self.bn3 = nn.BatchNorm2d(out_channels)

        self.relu = nn.ReLU(inplace=True)
        self.pool = nn.MaxPool2d(2, 2)

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))

        xs = torch.split(out, out.size(1) // self.scales, dim=1)

        for i in range(1, self.scales):
            if i == 1:
                output = xs[i]
            else:
                output = output + xs[i]
            output = self.relu(self.bns[i-1](self.convs[i-1](output)))

        out = torch.cat((xs[0], output), dim=1)

        out = self.relu(self.bn3(self.conv3(out)))

        out = self.pool(out)
        return out

class AudioSpoofingRes2Net(nn.Module):
    def __init__(self):
        super(AudioSpoofingRes2Net, self).__init__()

        self.mfcc_res2net = Res2NetBlock(in_channels=1, out_channels=32, scales=4)
        self.cqt_res2net = Res2NetBlock(in_channels=1, out_channels=32, scales=4)

        
        self.fc1 = nn.Linear(115200,4096)
        self.fc_extra = nn.Linear(4096,2048)
        self.fc2 = nn.Linear(2048, 128)
        self.fc3 = nn.Linear(128, 1)  # Output layer

    def forward(self, mfcc, cqt):
        mfcc_output = self.mfcc_res2net(mfcc)
        cqt_output = self.cqt_res2net(cqt)

        mfcc_output = mfcc_output.view(mfcc_output.size(0), -1)
        cqt_output = cqt_output.view(cqt_output.size(0), -1)

        combined = torch.cat((mfcc_output, cqt_output), dim=1)

        x = F.relu(self.fc1(combined))
        x = F.relu(self.fc_extra(x))  # Pass through the new hidden layer
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))

        return x


In [15]:
file_id = '17T9mGl5X7135FVjT7V-oBB0gz3hOgzSy' 

destination = 'res2net.pth' 
gdown.download(f'https://drive.google.com/uc?id={file_id}', destination, quiet=False)

Downloading...
From (original): https://drive.google.com/uc?id=17T9mGl5X7135FVjT7V-oBB0gz3hOgzSy
From (redirected): https://drive.google.com/uc?id=17T9mGl5X7135FVjT7V-oBB0gz3hOgzSy&confirm=t&uuid=30a7ffcc-d2ff-4489-a712-5bdc3672e9d5
To: /kaggle/working/res2net.pth
100%|██████████| 1.92G/1.92G [00:31<00:00, 60.7MB/s]


'res2net.pth'

In [16]:
# Load the saved model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = AudioSpoofingRes2Net().to(device)
model.load_state_dict(torch.load('res2net.pth', map_location=device))

# Set the model to evaluation mode
model.eval()


# Variables to store predictions and labels
all_predictions = []
all_labels = []


with torch.no_grad():
    for mfcc_batch, cqt_batch, label_batch in val_loader:
        mfcc_batch = mfcc_batch.to(device)
        cqt_batch = cqt_batch.to(device)
        label_batch = label_batch.to(device)

        label_batch = label_batch.squeeze()  # Remove any extra dimensions

        # Forward pass: Get predictions from the model
        outputs = model(mfcc_batch, cqt_batch)

        # Apply a threshold of 0.5 to get binary predictions (1 for spoof, 0 for bonafide)
        predictions = (outputs.squeeze() >= 0.5).float()

        # Append the predictions and true labels for later analysis
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(label_batch.cpu().numpy())


# Convert predictions and labels to NumPy arrays
all_predictions = np.array(all_predictions)
all_labels = np.array(all_labels)

# Calculate accuracy
accuracy = accuracy_score(all_labels, all_predictions)
print(f"Validation Accuracy: {accuracy:.4f}")

# Generate the confusion matrix
conf_matrix = confusion_matrix(all_labels, all_predictions)
print("Confusion Matrix:")
print(conf_matrix)

# Precision
precision = precision_score(all_labels, all_predictions)
print(f"Precision: {precision:.4f}")

# Recall
recall = recall_score(all_labels, all_predictions)
print(f"Recall: {recall:.4f}")

# F1 Score
f1 = f1_score(all_labels, all_predictions)
print(f"F1 Score: {f1:.4f}")

from sklearn.metrics import roc_curve

def compute_eer(labels, scores):
    # Compute False Positive Rate (FPR) and True Positive Rate (TPR) at different thresholds
    fpr, tpr, thresholds = roc_curve(labels, scores)

    # False Rejection Rate (FRR) = 1 - True Positive Rate (TPR)
    fnr = 1 - tpr

    # Find the point where FPR and FNR intersect (Equal Error Rate)
    eer_threshold = thresholds[np.nanargmin(np.absolute(fnr - fpr))]
    eer = fpr[np.nanargmin(np.absolute(fnr - fpr))]

    return eer, eer_threshold

# Assuming `all_labels` are true labels and `outputs` are model probabilities (before thresholding)
eer, eer_threshold = compute_eer(all_labels, all_predictions)
print(f'EER: {eer:.4f} at threshold: {eer_threshold:.4f}')


# Define the t-DCF parameters (costs and priors)
Pspoof = 0.05  # Prior probability of spoof
Pbonafide = 1 - Pspoof  # Prior probability of bonafide
Cost_miss = 1  # Cost of missed detection (bonafide misclassified as spoof)
Cost_false_alarm = 10  # Cost of false alarm (spoof misclassified as bonafide)

# Compute t-DCF
def compute_tDCF(fpr, fnr):
    tDCF = Pbonafide * Cost_miss * fnr + Pspoof * Cost_false_alarm * fpr
    return tDCF

# Compute FPR and FNR at the EER threshold
fpr_eer, tpr_eer, _ = roc_curve(all_labels, all_predictions)
fnr_eer = 1 - tpr_eer

# Compute t-DCF at the EER threshold
tDCF_eer = compute_tDCF(fpr_eer, fnr_eer)

# Print the t-DCF array
print("t-DCF at all thresholds:")
print(tDCF_eer)


cuda


  model.load_state_dict(torch.load('res2net.pth', map_location=device))


Validation Accuracy: 0.8187
Confusion Matrix:
[[ 6055   903]
 [11229 48738]]
Precision: 0.9818
Recall: 0.8127
F1 Score: 0.8893
EER: 0.1298 at threshold: 1.0000
t-DCF at all thresholds:
[0.95       0.24277968 0.5       ]
