In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from tqdm import tqdm
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms
import glob
import torch.nn.functional as F

# Configuration
class Config:
    SR = 32000
    N_MELS = 128
    N_MFCC = 13
    MAX_SEQ_LEN = 200
    ROOT_FOLDER = '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/'
    BATCH_SIZE = 64
    N_EPOCHS = 10
    LR = 1e-4
    SUBSET_SIZE = 1000

CONFIG = Config()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Custom Dataset for combined MFCC and Mel-spectrogram features
class CustomDataset(Dataset):
    def __init__(self, mfcc_files, mel_files, labels=None, transform=None):
        self.mfcc_files = mfcc_files
        self.mel_files = mel_files
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.mel_files)  # Use mel_files to determine the length

    def __getitem__(self, idx):
        mel_image = Image.open(self.mel_files[idx]).convert('RGB')
        if self.transform:
            mel_image = self.transform(mel_image)

        if self.labels is not None:
            label = torch.tensor(self.labels[idx], dtype=torch.float32)
            # Provide dummy MFCC data if not available
            mfcc = torch.zeros((CONFIG.N_MFCC, CONFIG.MAX_SEQ_LEN))
            return mfcc, mel_image, label
        return mel_image

# Load file paths and labels for the datasets
def load_file_paths_and_labels(root_folder, subset_size=CONFIG.SUBSET_SIZE, mode='train'):
    if mode == 'train':
        real_mfcc_files = glob.glob(os.path.join(root_folder, 'train', 'real', 'mfcc', '*.npy'))[:subset_size]
        fake_mfcc_files = glob.glob(os.path.join(root_folder, 'train', 'fake', 'mfcc', '*.npy'))[:subset_size]
        real_mel_files = [f.replace('mfcc', 'mel').replace('.npy', '.png') for f in real_mfcc_files]
        fake_mel_files = [f.replace('mfcc', 'mel').replace('.npy', '.png') for f in fake_mfcc_files]

        mfcc_files = real_mfcc_files + fake_mfcc_files
        mel_files = real_mel_files + fake_mel_files
        labels = [[0, 1]] * len(real_mfcc_files) + [[1, 0]] * len(fake_mfcc_files)

    else:
        mel_files = []
        mfcc_files = []
        labels = None

    print(f"Mode: {mode}")
    print(f"MFCC Files: {mfcc_files[:5]}")  # Print first 5 file paths to verify
    print(f"Mel Files: {mel_files[:5]}")    # Print first 5 file paths to verify
    if mode == 'train':
        print(f"Labels: {labels[:5]}")      # Print first 5 labels to verify

    return mfcc_files, mel_files, labels

# Load file paths and labels
train_mfcc_files, train_mel_files, train_labels = load_file_paths_and_labels(CONFIG.ROOT_FOLDER, mode='train')

# Print dataset sizes
print(f'Training samples: {len(train_mfcc_files)}')

# Ensure non-empty loaders
assert len(train_mfcc_files) > 0, "Training dataset is empty!"

# Data transformations for Mel-spectrogram images
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Create dataset
full_dataset = CustomDataset(train_mfcc_files, train_mel_files, train_labels, transform=transform)

# Split dataset into training and validation sets
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=CONFIG.BATCH_SIZE, shuffle=False)

# Define the Bi-LSTM model
class BiLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, bidirectional, dropout):
        super(BiLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, n_layers, bidirectional=bidirectional, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)  # hidden_dim * 2 because it's bidirectional
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        num_directions = 2 if self.lstm.bidirectional else 1
        h_0 = torch.zeros(self.lstm.num_layers * num_directions, x.size(0), self.lstm.hidden_size).to(device)
        c_0 = torch.zeros(self.lstm.num_layers * num_directions, x.size(0), self.lstm.hidden_size).to(device)

        x = self.dropout(x)
        lstm_out, _ = self.lstm(x, (h_0, c_0))
        x = self.fc(self.dropout(lstm_out[:, -1, :]))
        return x

# Define the CNN model for Mel-spectrogram images
class CNN(nn.Module):
    def __init__(self, output_dim):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, output_dim)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 32 * 32)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Combine both models
class CombinedModel(nn.Module):
    def __init__(self, lstm_input_dim, lstm_hidden_dim, lstm_output_dim, lstm_n_layers, lstm_bidirectional, lstm_dropout, cnn_output_dim):
        super(CombinedModel, self).__init__()
        self.lstm = BiLSTM(lstm_input_dim, lstm_hidden_dim, lstm_output_dim, lstm_n_layers, lstm_bidirectional, lstm_dropout)
        self.cnn = CNN(cnn_output_dim)
        self.fc = nn.Linear(lstm_output_dim + cnn_output_dim, 2)

    def forward(self, mfcc, mel):
        lstm_out = torch.zeros(mfcc.size(0), 128).to(device)  # Dummy LSTM output if no MFCC is provided
        if mfcc is not None:
            mfcc = mfcc.permute(0, 2, 1)  # Change from (batch, channels, seq_len) to (batch, seq_len, input_dim)
            lstm_out = self.lstm(mfcc)
        cnn_out = self.cnn(mel)
        combined = torch.cat((lstm_out, cnn_out), dim=1)
        out = self.fc(combined)
        return out

# Model initialization
model = CombinedModel(
    lstm_input_dim=CONFIG.N_MFCC,
    lstm_hidden_dim=128,
    lstm_output_dim=128,
    lstm_n_layers=2,
    lstm_bidirectional=True,
    lstm_dropout=0.5,
    cnn_output_dim=128
).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=CONFIG.LR)

# Training and validation functions
def train(model, loader, criterion, optimizer, device):
    model.train()
    epoch_loss = 0
    correct = 0
    total = 0
    for mfcc, mel, labels in loader:
        mfcc, mel, labels = mfcc.to(device), mel.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(mfcc, mel)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        _, labels = torch.max(labels.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    accuracy = correct / total if total > 0 else 0  # Avoid division by zero
    return epoch_loss / len(loader) if len(loader) > 0 else 0, accuracy

def evaluate(model, loader, criterion, device):
    model.eval()
    epoch_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for mfcc, mel, labels in loader:
            mfcc, mel, labels = mfcc.to(device), mel.to(device), labels.to(device)
            outputs = model(mfcc, mel)
            loss = criterion(outputs, labels)
            epoch_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            _, labels = torch.max(labels.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = correct / total if total > 0 else 0  # Avoid division by zero
    return epoch_loss / len(loader) if len(loader) > 0 else 0, accuracy

# Training loop
best_valid_loss = float('inf')
for epoch in range(CONFIG.N_EPOCHS):
    train_loss, train_acc = train(model, train_loader, criterion, optimizer, device)
    valid_loss, valid_acc = evaluate(model, val_loader, criterion, device)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'best-model.pt')

    print(f'Epoch {epoch+1}')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

# Load the best model
model.load_state_dict(torch.load('best-model.pt'))

# Final evaluation on the validation set
final_loss, final_acc = evaluate(model, val_loader, criterion, device)
print(f'Final Loss: {final_loss:.3f} | Final Acc: {final_acc*100:.2f}%')

Mode: train
MFCC Files: ['/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mfcc/UHFUUCRQ.npy', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mfcc/IBBQUTRN.npy', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mfcc/TWUQMGVY.npy', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mfcc/KWZBNAQV.npy', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mfcc/TLAUWDVF.npy']
Mel Files: ['/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mel/UHFUUCRQ.png', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mel/IBBQUTRN.png', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mel/TWUQMGVY.png', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mel/KWZBNAQV.png', '/content/drive/MyDrive/dataset/TeamDeepwave/dataset/preprocessed/train/real/mel/TLAUWDVF.png'

KeyboardInterrupt: 