In [1]:
import os
import torch
import torchaudio
import torch.nn.functional as F
from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report, accuracy_score

In [2]:
# Configuration
SAMPLE_RATE = 16000
FIXED_LENGTH = SAMPLE_RATE * 2  # 2 seconds
BATCH_SIZE = 8

# Check device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Dataset class (same as training)
class AudioDeepfakeDataset(Dataset):
    def __init__(self, files, labels, transform=None):
        self.files = files
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        path = self.files[idx]
        label = self.labels[idx]

        waveform, sr = torchaudio.load(path)
        if sr != SAMPLE_RATE:
            resampler = torchaudio.transforms.Resample(sr, SAMPLE_RATE)
            waveform = resampler(waveform)

        if self.transform:
            waveform = self.transform(waveform)

        return waveform, label

In [4]:
# Pad/truncate waveform
def pad_waveform(waveform, length=FIXED_LENGTH):
    if waveform.shape[1] > length:
        return waveform[:, :length]
    else:
        return F.pad(waveform, (0, length - waveform.shape[1]))

transform = lambda x: pad_waveform(x)

In [5]:
# Load dataset
def load_dataset(data_dir="/Users/fenilvadher/Documents/Collage Data/SEM - 6/AI/AI Project/fake_audio"):
    paths, labels = [], []
    label_map = {"real": 0, "fake": 1}
    for label_str, label_int in label_map.items():
        folder = os.path.join(data_dir, label_str)
        for file in os.listdir(folder):
            if file.endswith(".wav"):
                paths.append(os.path.join(folder, file))
                labels.append(label_int)
    return paths, labels

In [6]:
# CNN Model (same as training)
class AudioCNN(nn.Module):
    def __init__(self, input_length):
        super(AudioCNN, self).__init__()
        self.conv1 = nn.Conv1d(1, 16, kernel_size=5, stride=2)
        self.bn1 = nn.BatchNorm1d(16)
        self.pool1 = nn.MaxPool1d(2)
        self.conv2 = nn.Conv1d(16, 32, kernel_size=5, stride=2)
        self.bn2 = nn.BatchNorm1d(32)
        self.pool2 = nn.MaxPool1d(2)

        with torch.no_grad():
            dummy_input = torch.zeros(1, 1, input_length)
            x = self.pool1(F.relu(self.bn1(self.conv1(dummy_input))))
            x = self.pool2(F.relu(self.bn2(self.conv2(x))))
            self.flattened_size = x.view(1, -1).shape[1]

        self.fc1 = nn.Linear(self.flattened_size, 64)
        self.fc2 = nn.Linear(64, 2)

    def forward(self, x):
        x = self.pool1(F.relu(self.bn1(self.conv1(x))))
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

In [7]:
# Load test data
file_paths, file_labels = load_dataset("/Users/fenilvadher/Documents/Collage Data/SEM - 6/AI/AI Project/fake_audio")
from sklearn.model_selection import train_test_split
_, test_paths, _, test_labels = train_test_split(file_paths, file_labels, test_size=0.2, random_state=42)
test_dataset = AudioDeepfakeDataset(test_paths, test_labels, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [8]:
# Load model
model = AudioCNN(FIXED_LENGTH).to(device)
model.load_state_dict(torch.load("/Users/fenilvadher/Documents/Collage Data/SEM - 6/AI/AI Project/audio_deepfake_model.pth", map_location=device))
model.eval()
print("✅ Model loaded successfully.")

✅ Model loaded successfully.


In [9]:
# Evaluate on test set
all_preds = []
all_labels = []

In [None]:
with torch.no_grad():
    for waveforms, labels in test_loader:
        waveforms = waveforms.to(device)
        labels = labels.to(device)

        if waveforms.dim() == 2:
            waveforms = waveforms.unsqueeze(1)
with torch.no_grad():
    for waveforms, labels in test_loader:
        waveforms = waveforms.to(device)
        labels = labels.to(device)

        if waveforms.dim() == 2:
            waveforms = waveforms.unsqueeze(1)

        outputs = model(waveforms)
        _, predicted = torch.max(outputs, 1)

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())
        outputs = model(waveforms)
        _, predicted = torch.max(outputs, 1)

        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

In [11]:
# Report
accuracy = accuracy_score(all_labels, all_preds)
print(f"\n✅ Test Accuracy: {accuracy * 100:.2f}%")
print("\n📊 Classification Report:")
print(classification_report(all_labels, all_preds, target_names=["Real", "Fake"]))


✅ Test Accuracy: 55.00%

📊 Classification Report:
              precision    recall  f1-score   support

        Real       0.62      0.67      0.64        12
        Fake       0.43      0.38      0.40         8

    accuracy                           0.55        20
   macro avg       0.52      0.52      0.52        20
weighted avg       0.54      0.55      0.54        20



In [13]:
# Predict on a single audio file
def predict_audio(file_path):
    waveform, sr = torchaudio.load(file_path)
    if sr != SAMPLE_RATE:
        resampler = torchaudio.transforms.Resample(sr, SAMPLE_RATE)
        waveform = resampler(waveform)

    waveform = transform(waveform)
    if waveform.dim() == 2:
        waveform = waveform.unsqueeze(0)
    waveform = waveform.to(device)

    model.eval()
    with torch.no_grad():
        output = model(waveform)
        predicted_class = torch.argmax(output, dim=1).item()
    return "Real" if predicted_class == 0 else "Fake"

# Example usage
test_file_path = "/Users/fenilvadher/Documents/Collage Data/SEM - 6/AI/AI Project/fake_audio/real/speaker5_9.wav"  # Replace with your file
print(f"\n🎧 Prediction for custom file: {predict_audio(test_file_path)}")



🎧 Prediction for custom file: Real
