In [70]:
import os
import numpy as np
import torch
import torchaudio
import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

# Set dataset directory (CHANGE THIS TO YOUR AUDIO FOLDER PATH)
DATASET_PATH = "/Users/jaydeepjad/Desktop/Momenta-Audio-Deepfake-Detection/audio_data"

# Ensure dataset path exists
if not os.path.exists(DATASET_PATH):
    raise FileNotFoundError(f"Dataset path {DATASET_PATH} not found!")

# Function to extract MFCC features
def extract_features(file_path, sample_rate=16000, n_mfcc=13):
    try:
        waveform, sr = librosa.load(file_path, sr=sample_rate)
        mfcc = librosa.feature.mfcc(y=waveform, sr=sr, n_mfcc=n_mfcc)
        return np.mean(mfcc.T, axis=0)  # Take the mean across time
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Load dataset
X, y = [], []

for root, _, files in os.walk(DATASET_PATH):
    for file in files:
        if file.endswith(".wav"):
            file_path = os.path.join(root, file)
            features = extract_features(file_path)
            if features is not None:
                X.append(features)
                y.append("real" if "genuine" in file else "fake")  # Modify based on your dataset labels

# Convert to NumPy array
X = np.array(X)
y = np.array(y)

# Debugging: Check dataset size
print(f"Total samples loaded: {len(X)}")

if len(X) == 0:
    raise ValueError("No audio samples were loaded. Check your dataset path and file format.")

# Encode labels
encoder = LabelEncoder()
y = encoder.fit_transform(y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create PyTorch dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Define a simple neural network model
class AudioDeepfakeClassifier(nn.Module):
    def __init__(self):
        super(AudioDeepfakeClassifier, self).__init__()
        self.fc1 = nn.Linear(13, 64)  # 13 MFCC features as input
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 2)  # Output 2 classes: Real/Fake
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.softmax(self.fc2(x))
        return x

# Initialize model, loss, and optimizer
model = AudioDeepfakeClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")

# Evaluate the model
model.eval()
correct, total = 0, 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        correct += (predicted == y_batch).sum().item()
        total += y_batch.size(0)

print(f"Test Accuracy: {100 * correct / total:.2f}%")


FileNotFoundError: Dataset path /Users/jaydeepjad/Desktop/Momenta-Audio-Deepfake-Detection/audio_data not found!

In [71]:
pip install numpy torch torchaudio librosa scikit-learn



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
