In [20]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torchvision.models import resnet18
import torch.nn.functional as F  
import torchvision.models as models  # Import this line to access pre-trained models
import random


In [2]:
# --------- 1. Merge CSV Files ---------
def merge_csv_files(directory, output_file, prefix="train_segment_merged_part_"):
    # Identify files with the specified prefix and .csv extension
    csv_files = [os.path.join(directory, f) for f in os.listdir(directory) 
                 if f.startswith(prefix) and f.endswith('.csv')]
    
    # Load and concatenate all identified files
    df_list = [pd.read_csv(file) for file in csv_files]
    merged_df = pd.concat(df_list, ignore_index=True)
    
    # Save the merged dataframe to a single CSV file
    merged_df.to_csv(output_file, index=False)
    return merged_df

# Merge the files
csv_dir = r"C:\Notebooks\rrl_source\dataset_raw\merge\new2"
output_file = "merged_features.csv"
merged_data = merge_csv_files(csv_dir, output_file)

# Verify the result
print(f"Merged data shape: {merged_data.shape}")

Merged data shape: (533035, 1086)


In [3]:
# --------- 2. Preprocess Data ---------
def preprocess_data(df):
    # Extract features and labels
    file_names = df.iloc[:, 0]  # File names (not used for training)
    labels = df.iloc[:, -1].values  # Labels (1=real, 0=fake)
    features = df.iloc[:, 1:-1].values  # Features (1x1084)
    
    # Normalize features
    scaler = StandardScaler()
    features_normalized = scaler.fit_transform(features)
    
    # Split data
    X_train, X_temp, y_train, y_temp = train_test_split(features_normalized, labels, test_size=0.4, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)
    
    return X_train, X_val, X_test, y_train, y_val, y_test, features_normalized, labels



In [29]:
class AudioDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        anchor = self.features[idx]  # The anchor sample
        
        # Create a positive pair
        pos_idx = idx
        while pos_idx == idx:  # Ensure different sample for positive pair
            pos_idx = random.choice(range(len(self.labels)))
        positive = self.features[pos_idx]
        
        # Create a negative pair
        neg_idx = random.choice(range(len(self.labels)))  # Randomly choose a negative sample
        while self.labels[neg_idx] == self.labels[idx]:  # Ensure different class for negative pair
            neg_idx = random.choice(range(len(self.labels)))
        negative = self.features[neg_idx]
        
        return anchor, positive, negative

# Assuming the `merged_data` is a pandas DataFrame you passed to preprocess_data
X_train, X_val, X_test, y_train, y_val, y_test, features, labels = preprocess_data(merged_data)

# Create Dataset objects
train_dataset = AudioDataset(X_train, y_train)
val_dataset = AudioDataset(X_val, y_val)
test_dataset = AudioDataset(X_test, y_test)

# Create DataLoader instances
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [68]:
class AttentionModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, num_classes=2, num_heads=8):
        super(AttentionModel, self).__init__()

        # Embedding layer to transform input features into the desired embedding dimension
        self.embedding = nn.Linear(input_dim, embedding_dim)

        # Positional Encoding (for attention layers)
        self.positional_encoding = nn.Parameter(torch.randn(1, embedding_dim))

        # Multi-Head Self Attention Layer
        self.attention = nn.MultiheadAttention(embed_dim=embedding_dim, num_heads=num_heads, batch_first=True)

        # Deeper Feed Forward Network after Attention
        self.fc1 = nn.Linear(embedding_dim, embedding_dim * 2)
        self.fc2 = nn.Linear(embedding_dim * 2, embedding_dim * 4)
        self.fc3 = nn.Linear(embedding_dim * 4, num_classes)

        # Layer Normalization for stability
        self.layer_norm = nn.LayerNorm(embedding_dim)
        
        # Dropout for regularization
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        # Embedding input to match the attention input size
        x = self.embedding(x)  # [batch_size, seq_len, embedding_dim]

        # Add Positional Encoding
        x = x + self.positional_encoding

        # Attention Layer (self-attention)
        attn_output, attn_weights = self.attention(x, x, x)  # (batch_size, seq_len, embedding_dim), (batch_size, seq_len, seq_len)

        # Layer normalization after attention
        x = self.layer_norm(attn_output + x)

        # Feed Forward Network
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)

        return x.squeeze(1), attn_weights  # Squeeze to get batch_size, num_classes output
input_dim = 1084
embedding_dim = 128
num_classes = 2
# Instantiate the model
model = AttentionModel(input_dim=input_dim, embedding_dim=embedding_dim, num_classes=num_classes).cuda()

In [69]:
class NTXentLoss(nn.Module):
    def __init__(self, temperature=0.5):
        super(NTXentLoss, self).__init__()
        self.temperature = temperature
    
    def forward(self, anchor_emb, positive_emb, negative_emb):
        # Normalize the embeddings to unit length (cosine similarity)
        anchor_emb = nn.functional.normalize(anchor_emb, dim=1)
        positive_emb = nn.functional.normalize(positive_emb, dim=1)
        negative_emb = nn.functional.normalize(negative_emb, dim=1)

        # Compute cosine similarity
        pos_sim = torch.sum(anchor_emb * positive_emb, dim=1)
        neg_sim = torch.sum(anchor_emb * negative_emb, dim=1)

        # Compute the contrastive loss
        loss_pos = -torch.log(torch.exp(pos_sim / self.temperature) / (torch.exp(pos_sim / self.temperature) + torch.exp(neg_sim / self.temperature)))
        loss = loss_pos.mean()
        
        return loss

criterion = NTXentLoss()

In [70]:
 #--------- 6. Optimizer ---------
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-3)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.7)

In [71]:
def train_contrastive_model(model, criterion, optimizer, train_loader, val_loader, test_loader, epochs=10):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for anchor, positive, negative in train_loader:  # Fetch pairs
            anchor, positive, negative = anchor.to(device), positive.to(device), negative.to(device)
            optimizer.zero_grad()

            # Forward pass to get embeddings for anchor, positive, and negative pairs
            anchor_emb, _ = model(anchor)  
            positive_emb, _ = model(positive)  
            negative_emb, _ = model(negative)  

            # Contrastive loss (NT-Xent Loss)
            loss = criterion(anchor_emb, positive_emb, negative_emb)  # Pass all three embeddings at once

            loss.backward()
            optimizer.step()

            total_loss += loss.item()
        scheduler.step()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(train_loader):.4f}")

        # Evaluate on validation set only during training
        print("Evaluating on Validation Set...")
        val_auc, val_confusion, val_eer = evaluate_model(model, val_loader, device)
        print(f"Validation AUC: {val_auc:.4f}, EER: {val_eer:.4f}")
        print(f"Confusion Matrix (Validation):\n{val_confusion}")

    # After training is complete, evaluate on the test set
    print("Evaluating on Test Set after Training...")
    test_auc, test_confusion, test_eer = evaluate_model(model, test_loader, device)
    print(f"Test AUC: {test_auc:.4f}, EER: {test_eer:.4f}")
    print(f"Confusion Matrix (Test):\n{test_confusion}")

# Now you can call the train_contrastive_model function with all three loaders
train_contrastive_model(model, criterion, optimizer, train_loader, val_loader, test_loader, epochs=100)


Epoch 1/100, Loss: 0.6844
Evaluating on Validation Set...
Validation AUC: 0.6085, EER: 0.5783
Confusion Matrix (Validation):
[[23168 83439]
 [13555 93052]]
Epoch 2/100, Loss: 0.6471
Evaluating on Validation Set...
Validation AUC: 0.6214, EER: 0.5941
Confusion Matrix (Validation):
[[31352 75255]
 [17331 89276]]
Epoch 3/100, Loss: 0.6399
Evaluating on Validation Set...
Validation AUC: 0.6467, EER: 0.6131
Confusion Matrix (Validation):
[[39500 67107]
 [20522 86085]]
Epoch 4/100, Loss: 0.6316
Evaluating on Validation Set...
Validation AUC: 0.6406, EER: 0.6105
Confusion Matrix (Validation):
[[46712 59895]
 [25832 80775]]
Epoch 5/100, Loss: 0.6301
Evaluating on Validation Set...
Validation AUC: 0.6459, EER: 0.6114
Confusion Matrix (Validation):
[[37716 68891]
 [19811 86796]]
Epoch 6/100, Loss: 0.6285
Evaluating on Validation Set...
Validation AUC: 0.6530, EER: 0.6166
Confusion Matrix (Validation):
[[48873 57734]
 [26754 79853]]
Epoch 7/100, Loss: 0.6267
Evaluating on Validation Set...
Valida

In [None]:
# --------- 8. t-SNE Visualization ---------
def plot_tsne(features, labels, title, save_path):
    tsne = TSNE(n_components=2, random_state=42)
    tsne_features = tsne.fit_transform(features)
    plt.figure(figsize=(8, 8))
    plt.scatter(tsne_features[:, 0], tsne_features[:, 1], c=labels, cmap='viridis', s=10)
    plt.colorbar()
    plt.title(title)
    plt.savefig(save_path)
    plt.show()

# t-SNE before training
plot_tsne(features[:1000], labels[:1000], "t-SNE Before Training", "tsne_before.png")

In [72]:
# t-SNE after training
model.eval()

AttentionModel(
  (embedding): Linear(in_features=1084, out_features=128, bias=True)
  (attention): MultiheadAttention(
    (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
  )
  (fc1): Linear(in_features=128, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=512, bias=True)
  (fc3): Linear(in_features=512, out_features=2, bias=True)
  (layer_norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.3, inplace=False)
)

In [78]:
# Use torch.float32 dtype for input
with torch.no_grad():
    final_embeddings, _ = model(torch.tensor(features[:10000], dtype=torch.float32).cuda())  # Get only the embeddings (ignore attention_weights)
    final_embeddings = final_embeddings.cpu().numpy()  # Convert embeddings to CPU for visualization

plot_tsne(final_embeddings, labels[:10000], "t-SNE After Training", "tsne_after.png")



RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
model.summary