In [9]:
nvidia -smi

NameError: name 'nvidia' is not defined

In [1]:
import librosa
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import noisereduce as nr
from torchvision import models, transforms
from PIL import Image
import os
import cv2
import random
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import copy  # Added for deepcopy in early stopping

# Preprocessing Functions
def load_audio(file_path, sr=16000):
    """Load any audio format and resample."""
    try:
        audio, sample_rate = librosa.load(file_path, sr=sr)
        return audio, sample_rate
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None, None

def normalize_audio(audio):
    """Normalize audio to have zero mean and unit variance."""
    return (audio - np.mean(audio)) / np.std(audio)

def remove_gaussian_noise(audio, sr):
    """Apply spectral gating to remove Gaussian noise."""
    reduced_noise = nr.reduce_noise(y=audio, sr=sr)
    return reduced_noise

def apply_histogram_equalization(mel_spectrogram):
    """Apply histogram equalization to mel spectrogram."""
    mel_spectrogram_norm = cv2.normalize(mel_spectrogram, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    return cv2.equalizeHist(mel_spectrogram_norm)

def extract_mel_spectrogram(audio, sr, n_mels=128, n_fft=2048, hop_length=512):
    """Extract mel-spectrogram with histogram equalization."""
    S = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length)
    S_dB = librosa.power_to_db(S, ref=np.max)
    S_eq = apply_histogram_equalization(S_dB)
    return S_eq

# Dataset Class
class AudioDataset(Dataset):
    def __init__(self, base_path, transform=None, sr=16000, real_prefixes=['T_']):
        self.transform = transform
        self.file_paths = []
        self.labels = []
        self.sr = sr
        self.real_prefixes = real_prefixes  # Added parameter
        
        dataset_path = os.path.join(base_path)
        print(f"Dataset path: {dataset_path}")
        
        if not os.path.exists(dataset_path):
            print(f"Error: Dataset path {dataset_path} does not exist.")
            return
        
        # Scan directory for audio files
        for file_name in os.listdir(dataset_path):
            if file_name.endswith('.wav'):
                self.file_paths.append(os.path.join(dataset_path, file_name))
                # Extract label based on real_prefixes
                label = 1 if any(file_name.startswith(prefix) for prefix in self.real_prefixes) else 0
                self.labels.append(label)
        
        if len(self.file_paths) == 0:
            print("Error: No .wav files found in the dataset path.")
        
        # Print class distribution
        genuine = self.labels.count(1)
        spoofed = self.labels.count(0)
        print(f"Number of genuine samples: {genuine}")
        print(f"Number of spoofed samples: {spoofed}")
        print(f"Total files found: {len(self.file_paths)}")

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        label = self.labels[idx]

        audio, sr = load_audio(file_path, self.sr)
        audio = normalize_audio(audio)
        audio = remove_gaussian_noise(audio, sr)
        mel_spectrogram = extract_mel_spectrogram(audio, sr)

        # Convert to image format for ResNet input
        mel_spectrogram = np.stack([mel_spectrogram] * 3, axis=-1)
        mel_spectrogram = Image.fromarray(np.uint8(mel_spectrogram))

        if self.transform:
            mel_spectrogram = self.transform(mel_spectrogram)

        return mel_spectrogram, torch.tensor(label, dtype=torch.long)

# Add SEBlock class
class SEBlock(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SEBlock, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)

# Define ResNet++ Model (Enhanced ResNet)
class ResNetPlusPlus(nn.Module):
    def __init__(self, num_classes=2):
        super(ResNetPlusPlus, self).__init__()
        # Update pretrained parameter to use weights or set to None
        self.model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
        
        # Add additional layers
        self.model.fc = nn.Sequential(
            nn.Linear(self.model.fc.in_features, 512),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(256, num_classes)
        )
        
        # Add SE blocks to each residual block
        for layer in self.model.layer1:
            layer.add_module('se', SEBlock(256))
        for layer in self.model.layer2:
            layer.add_module('se', SEBlock(512))
        for layer in self.model.layer3:
            layer.add_module('se', SEBlock(1024))
        for layer in self.model.layer4:
            layer.add_module('se', SEBlock(2048))

    def forward(self, x):
        return self.model(x)

# Add Metrics Calculation Functions
def calculate_metrics(y_true, y_pred):
    """Calculate accuracy, precision, recall, and F1-score."""
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    return accuracy, precision, recall, f1

def evaluate_model(model, data_loader, criterion, device):
    """Evaluate the model and compute metrics."""
    model.eval()
    total_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in data_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    avg_loss = total_loss / len(data_loader)
    accuracy, precision, recall, f1 = calculate_metrics(all_labels, all_preds)
    
    return avg_loss, accuracy, precision, recall, f1

# Add EarlyStopping class
class EarlyStopping:
    def __init__(self, patience=5, verbose=False):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.best_model_wts = None

    def __call__(self, val_loss, model):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.best_model_wts = copy.deepcopy(model.state_dict())
        elif val_loss < self.best_loss:
            self.best_loss = val_loss
            self.best_model_wts = copy.deepcopy(model.state_dict())
            self.counter = 0
            if self.verbose:
                print(f'Validation loss decreased to {val_loss:.4f}. Resetting early stopping counter.')
        else:
            self.counter += 1
            if self.verbose:
                print(f'Validation loss did not decrease. Early stopping counter: {self.counter}/{self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True

# Training Pipeline
def train_model(model, train_loader, dev_loader, criterion, optimizer, device, epochs=10, patience=5):
    model.train()
    early_stopping = EarlyStopping(patience=patience, verbose=True)
    
    for epoch in range(epochs):
        print(f'\n--- Epoch {epoch + 1}/{epochs} ---')
        print('Starting Training Phase...')
        running_loss = 0.0
        for batch_idx, (inputs, labels) in enumerate(train_loader, 1):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if batch_idx % 100 == 0 or batch_idx == len(train_loader):
                print(f'Batch {batch_idx}/{len(train_loader)}, Loss: {loss.item():.4f}')
        avg_train_loss = running_loss / len(train_loader)
        print(f'Epoch {epoch + 1} Training Loss: {avg_train_loss:.4f}')

        # Validation on development set
        print('\nStarting Validation Phase...')
        avg_val_loss, val_acc, val_prec, val_rec, val_f1 = evaluate_model(model, dev_loader, criterion, device)
        print(f'Epoch {epoch + 1} Validation Loss: {avg_val_loss:.4f}')
        print(f'Validation Metrics - Acc: {val_acc:.4f}, Prec: {val_prec:.4f}, Rec: {val_rec:.4f}, F1: {val_f1:.4f}')

        # Check early stopping
        early_stopping(avg_val_loss, model)
        if early_stopping.early_stop:
            print("Early stopping triggered. Stopping training.")
            break

    # Load best model weights
    if early_stopping.best_model_wts:
        model.load_state_dict(early_stopping.best_model_wts)
        print("Loaded best model weights from early stopping.")

    # After training, evaluate on the evaluation set
    print('\nStarting Evaluation Phase...')
    test_loss, test_acc, test_prec, test_rec, test_f1 = evaluate_model(model, eval_loader, criterion, device)
    print(f"Evaluation Results - Loss: {test_loss:.4f}")
    print(f'Evaluation Metrics - Acc: {test_acc:.4f}, Prec: {test_prec:.4f}, Rec: {test_rec:.4f}, F1: {test_f1:.4f}')

# Example Usage
if __name__ == "__main__":
    # Update base path to your root directory
    base_path = "D:/Digital Audio Forensics"  # Adjust this to your actual root path

    # Data transformations
    data_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    # Determine device (use GPU if available)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")  # Updated to reflect device change
    
    # Prepare dataset and dataloader
    # Swap training and evaluation datasets
    train_dataset = AudioDataset(
        os.path.join(base_path, "Dataset", "DS_10283_3055", "ASVspoof2017_V2_eval", "ASVspoof2017_V2_eval"),  # Swapped path
        transform=data_transforms,
        real_prefixes=['E_']  # Updated prefix for eval as train
    )
    train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
    
    dev_dataset = AudioDataset(
        os.path.join(base_path, "Dataset", "DS_10283_3055", "ASVspoof2017_V2_dev", "ASVspoof2017_V2_dev"),
        transform=data_transforms,
        real_prefixes=['D_']  # No change for development set
    )
    dev_loader = DataLoader(dev_dataset, batch_size=2, shuffle=False)
    
    eval_dataset = AudioDataset(
        os.path.join(base_path, "Dataset", "DS_10283_3055", "ASVspoof2017_V2_train", "ASVspoof2017_V2_train"),  # Swapped path
        transform=data_transforms,
        real_prefixes=['T_']  # Updated prefix for train as eval
    )
    eval_loader = DataLoader(eval_dataset, batch_size=2, shuffle=False)

    # Initialize model, criterion, and optimizer
    model = ResNetPlusPlus().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Added weight decay

    # Train the model with validation
    train_model(model, train_loader, dev_loader, criterion, optimizer, device, epochs=10)


  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu
Dataset path: D:/Digital Audio Forensics\Dataset\DS_10283_3055\ASVspoof2017_V2_eval\ASVspoof2017_V2_eval
Number of genuine samples: 8520
Number of spoofed samples: 0
Total files found: 8520
Dataset path: D:/Digital Audio Forensics\Dataset\DS_10283_3055\ASVspoof2017_V2_dev\ASVspoof2017_V2_dev
Number of genuine samples: 1710
Number of spoofed samples: 0
Total files found: 1710
Dataset path: D:/Digital Audio Forensics\Dataset\DS_10283_3055\ASVspoof2017_V2_train\ASVspoof2017_V2_train
Number of genuine samples: 3014
Number of spoofed samples: 0
Total files found: 3014

--- Epoch 1/10 ---
Starting Training Phase...


KeyboardInterrupt: 