In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import roc_curve, roc_auc_score, precision_recall_curve, confusion_matrix, precision_score, f1_score, recall_score
import matplotlib.pyplot as plt
import seaborn as sns
import os
import ast
import numpy as np
import itertools

print(os.getcwd())

# Check for MPS availability (Apple Silicon GPU)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Training on device: {device}")

# Load data
df_train = pd.read_csv("../CodeT5/train_data.csv")
df_test = pd.read_csv("../CodeT5/test_data.csv")

X_train = torch.load("../CodeT5/X_train.pt", weights_only=False)
X_test = torch.load("../CodeT5/X_test.pt", weights_only=False)

# Process vulnerability_list (9 dimensions)
def process_vulnerability_list(vuln_list_series, num_classes=9):
    vuln_lists = vuln_list_series.apply(ast.literal_eval)
    y_binary = np.array([np.array(vuln) for vuln in vuln_lists], dtype=np.float32)
    if y_binary.shape[1] != num_classes:
        raise ValueError(f"Expected {num_classes} dimensions, got {y_binary.shape[1]}")
    return torch.tensor(y_binary, dtype=torch.float32)

y_train = process_vulnerability_list(df_train['vulnerability_list'], num_classes=9)
y_test = process_vulnerability_list(df_test['vulnerability_list'], num_classes=9)

torch.save(y_train, "../CodeT5/y_train.pt")
torch.save(y_test, "../CodeT5/y_test.pt")

# Verify shapes
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

/Users/rita/Documents/9309_ML/Smart_Contract_Vulnerabilities_Project/Model Training
Training on device: mps
X_train shape: torch.Size([4294, 768])
y_train shape: torch.Size([4294, 9])
X_test shape: torch.Size([1074, 768])
y_test shape: torch.Size([1074, 9])


In [2]:
# Load VulnScreener and get probabilities
class VulnScreener(nn.Module):
    def __init__(self):
        super(VulnScreener, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(768, 256),  # Input layer to Hidden Layer 1
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),  # Hidden Layer 1 to Hidden Layer 2
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),    # Hidden Layer 2 to Output Layer
            nn.Sigmoid()          # Probability output
        )
    
    def forward(self, x):
        return self.mlp(x)  # Forward pass through the network
screener = torch.load('../CodeT5/vuln_screener_model.pth', weights_only=False)
screener.eval()
with torch.no_grad():
    train_prob = screener(X_train)
    test_prob = screener(X_test)

# Enhanced oversampling with noise injection
def oversample_rare_classes(X, p_s, y, class_indices=[2, 3, 7], multiplier=5, noise_level=0.01):
    mask = torch.any(y[:, class_indices] == 1, dim=1)
    X_rare = X[mask]
    p_s_rare = p_s[mask]
    y_rare = y[mask]
    for _ in range(multiplier):
        noise = torch.randn_like(X_rare) * noise_level
        X = torch.cat([X, X_rare + noise], dim=0)
        p_s = torch.cat([p_s, p_s_rare], dim=0)
        y = torch.cat([y, y_rare], dim=0)
    return X, p_s, y

X_train, train_prob, y_train = oversample_rare_classes(X_train, train_prob, y_train)

In [3]:
# Weighted Focal Loss
class WeightedFocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2):
        super(WeightedFocalLoss, self).__init__()
        self.gamma = gamma
        self.alpha = alpha if alpha is not None else torch.ones(9).to(device)  # Default equal weights
    
    def forward(self, inputs, targets):
        BCE_loss = nn.BCELoss(reduction='none')(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1 - pt) ** self.gamma * BCE_loss
        return F_loss.mean()

# Enhanced VulnAnalyzer with corrected residual connection
class VulnAnalyzer(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(VulnAnalyzer, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 769 -> 384
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 384 -> 192
        )
        self.conv3 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 192 -> 96
        )
        # Adjust residual path to match output size of conv3 (256 channels, 96 length)
        self.residual = nn.Sequential(
            nn.Conv1d(1, 256, kernel_size=1),
            nn.AvgPool1d(kernel_size=8, stride=8)  # Downsample 769 to ~96
        )
        self.fc_layers = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256 * 96, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 9),
            nn.Sigmoid()
        )
    
    def forward(self, x, p_s):
        if p_s.dim() == 1:
            p_s = p_s.unsqueeze(1)
        x = torch.cat((x, p_s), dim=1).unsqueeze(1)  # [batch_size, 1, 769]
        residual = self.residual(x)  # [batch_size, 256, 96]
        x = self.conv1(x)  # [batch_size, 64, 384]
        x = self.conv2(x)  # [batch_size, 128, 192]
        x = self.conv3(x)  # [batch_size, 256, 96]
        # Ensure residual matches x’s size
        if residual.size(2) != x.size(2):
            residual = nn.functional.interpolate(residual, size=x.size(2), mode='nearest')
        x = x + residual  # Residual connection
        x = x.view(x.size(0), -1)  # [batch_size, 256 * 96]
        x = self.fc_layers(x)
        return x, None

In [4]:
# Training function with early stopping
def train_vuln_analyzer(X_train, p_s_train, y_train, threshold=0.7, epochs=200, lr=0.0001, 
                       dropout_rate=0.2, weight_decay=0.001, validator_feedback=None, patience=20):
    model = VulnAnalyzer(dropout_rate=dropout_rate).to(device)
    
    # Compute class weights based on inverse frequency
    class_counts = y_train.sum(dim=0)
    alpha = 1.0 / (class_counts + 1e-6)
    alpha = alpha / alpha.sum() * 9  # Normalize to sum to 9
    criterion = WeightedFocalLoss(alpha=alpha.to(device), gamma=2)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    
    X_train, p_s_train, y_train = X_train.to(device), p_s_train.to(device), y_train.to(device)
    
    best_f1 = 0.0
    patience_counter = 0
    
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        
        mask = p_s_train > threshold
        X_train_filtered = X_train[mask.squeeze()]
        p_s_train_filtered = p_s_train[mask]
        y_train_filtered = y_train[mask.squeeze()]
        
        if len(X_train_filtered) == 0:
            threshold = max(0.1, threshold - 0.1)
            mask = p_s_train > threshold
            X_train_filtered = X_train[mask.squeeze()]
            p_s_train_filtered = p_s_train[mask]
            y_train_filtered = y_train[mask.squeeze()]
        
        p_a, _ = model(X_train_filtered, p_s_train_filtered)
        loss = criterion(p_a, y_train_filtered)
        
        with torch.no_grad():
            y_pred_binary = (p_a > 0.5).float()
            train_f1 = f1_score(y_train_filtered.cpu(), y_pred_binary.cpu(), average='micro')
        
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()
        
        if validator_feedback is not None and epoch % 10 == 0 and epoch > 0:
            with torch.no_grad():
                for vuln_idx, correction_factor in validator_feedback.items():
                    if vuln_idx < 9 and correction_factor > 0:
                        model.fc_layers[-2].weight[vuln_idx] *= (1 + correction_factor * 0.1)
        
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item():.4f}, Train F1: {train_f1:.4f}, Threshold: {threshold:.3f}")
        
        # Early stopping
        if train_f1 > best_f1:
            best_f1 = train_f1
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break
    
    return model

In [5]:
# Enhanced testing function
def test_vuln_analyzer(model, X_test, p_s_test, y_test, detailed_report=True):
    model.eval()
    X_test, p_s_test, y_test = X_test.to(device), p_s_test.to(device), y_test.to(device)
    
    with torch.no_grad():
        p_a, attn_weights = model(X_test, p_s_test)
        y_pred_proba = p_a.cpu().numpy()
        y_true = y_test.cpu().numpy()
    
    metrics = {}
    n_classes = y_true.shape[1]
    optimal_thresholds = []
    
    for i in range(n_classes):
        precision, recall, thresholds = precision_recall_curve(y_true[:, i], y_pred_proba[:, i])
        f1_scores = 2 * precision * recall / (precision + recall + 1e-10)
        optimal_idx = np.argmax(f1_scores)
        optimal_thresholds.append(thresholds[optimal_idx])
    
    y_pred_binary = np.zeros_like(y_pred_proba)
    for i in range(n_classes):
        y_pred_binary[:, i] = (y_pred_proba[:, i] >= optimal_thresholds[i]).astype(int)
    
    metrics['avg_precision'] = precision_score(y_true, y_pred_binary, average='micro')
    metrics['avg_recall'] = recall_score(y_true, y_pred_binary, average='micro')
    metrics['avg_f1'] = f1_score(y_true, y_pred_binary, average='micro')
    metrics['avg_auc'] = roc_auc_score(y_true, y_pred_proba, average='micro')
    metrics['optimal_thresholds'] = optimal_thresholds
    
    class_metrics = {
        'precision': precision_score(y_true, y_pred_binary, average=None, zero_division=0),
        'recall': recall_score(y_true, y_pred_binary, average=None),
        'f1': f1_score(y_true, y_pred_binary, average=None),
        'auc': [roc_auc_score(y_true[:, i], y_pred_proba[:, i]) for i in range(n_classes)]
    }
    metrics['class_metrics'] = class_metrics
    
    print("\n=== VulnAnalyzer Test Results (Optimal Thresholds) ===")
    print(f"Test Samples: {len(X_test)}")
    print(f"Average Precision: {metrics['avg_precision']:.4f}")
    print(f"Average Recall: {metrics['avg_recall']:.4f}")
    print(f"Average F1-Score: {metrics['avg_f1']:.4f}")
    print(f"Average AUC: {metrics['avg_auc']:.4f}")
    
    if detailed_report:
        print("\nPer-Class Metrics with Optimal Thresholds:")
        for i in range(n_classes):
            print(f"\nVulnerability {i} (Threshold: {optimal_thresholds[i]:.3f}):")
            print(f"Precision: {class_metrics['precision'][i]:.4f}")
            print(f"Recall: {class_metrics['recall'][i]:.4f}")
            print(f"F1-Score: {class_metrics['f1'][i]:.4f}")
            print(f"AUC: {class_metrics['auc'][i]:.4f}")
    
    return metrics, y_pred_proba, attn_weights

In [6]:
# Final training and testing
validator_feedback = {0: 0.5, 2: 0.3}
final_model = train_vuln_analyzer(
    X_train, train_prob, y_train,
    threshold=0.7, epochs=200, lr=0.0001, dropout_rate=0.2, weight_decay=0.001,
    validator_feedback=validator_feedback
)
metrics, probabilities, attn_weights = test_vuln_analyzer(final_model, X_test, test_prob, y_test)

print("\nSample Probabilities (first 5):", probabilities[:5])
print("Attention Weights Shape:", attn_weights.shape if attn_weights is not None else "None (CNN)")

Epoch 0, Loss: 0.1940, Train F1: 0.2786, Threshold: 0.700
Epoch 10, Loss: 0.1093, Train F1: 0.4873, Threshold: 0.700
Epoch 20, Loss: 0.0762, Train F1: 0.6329, Threshold: 0.700
Epoch 30, Loss: 0.0597, Train F1: 0.7003, Threshold: 0.700
Epoch 40, Loss: 0.0486, Train F1: 0.7586, Threshold: 0.700
Epoch 50, Loss: 0.0410, Train F1: 0.8070, Threshold: 0.700
Epoch 60, Loss: 0.0349, Train F1: 0.8438, Threshold: 0.700
Epoch 70, Loss: 0.0305, Train F1: 0.8699, Threshold: 0.700
Epoch 80, Loss: 0.0265, Train F1: 0.8919, Threshold: 0.700
Epoch 90, Loss: 0.0234, Train F1: 0.9113, Threshold: 0.700
Epoch 100, Loss: 0.0213, Train F1: 0.9152, Threshold: 0.700
Epoch 110, Loss: 0.0195, Train F1: 0.9302, Threshold: 0.700
Epoch 120, Loss: 0.0184, Train F1: 0.9343, Threshold: 0.700
Epoch 130, Loss: 0.0172, Train F1: 0.9381, Threshold: 0.700
Epoch 140, Loss: 0.0166, Train F1: 0.9403, Threshold: 0.700
Epoch 150, Loss: 0.0162, Train F1: 0.9422, Threshold: 0.700
Epoch 160, Loss: 0.0156, Train F1: 0.9430, Threshol

In [7]:
torch.save(final_model, '../CodeT5/vuln_analyzer_model.pth')