In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns
import os
import ast
import numpy as np
import itertools
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, roc_curve, roc_auc_score, precision_recall_curve, confusion_matrix, precision_score, f1_score, recall_score

print(os.getcwd())

# Check for MPS availability (Apple Silicon GPU)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Training on device: {device}")

X_train = torch.load("../codebert/X_train.pt", weights_only=False).to(device)
X_test = torch.load("../codebert/X_test.pt", weights_only=False).to(device)
y_train = torch.load("../codebert/y_train.pt", weights_only=False).to(device)
y_test = torch.load("../codebert/y_test.pt", weights_only=False).to(device)

# Verify shapes
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

# Load VulnScreener and get probabilities
class VulnScreener(nn.Module):
    def __init__(self):
        super(VulnScreener, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(768, 256),  # Input layer to Hidden Layer 1
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),  # Hidden Layer 1 to Hidden Layer 2
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),    # Hidden Layer 2 to Output Layer
            nn.Sigmoid()          # Probability output
        )
    
    def forward(self, x):
        return self.mlp(x)  # Forward pass through the network

class VulnAnalyzer(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(VulnAnalyzer, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 769 -> 384
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 384 -> 192
        )
        self.conv3 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 192 -> 96
        )
        # Adjust residual path to match output size of conv3 (256 channels, 96 length)
        self.residual = nn.Sequential(
            nn.Conv1d(1, 256, kernel_size=1),
            nn.AvgPool1d(kernel_size=8, stride=8)  # Downsample 769 to ~96
        )
        self.fc_layers = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256 * 96, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 9),
            nn.Sigmoid()
        )
    
    def forward(self, x, p_s):
        if p_s.dim() == 1:
            p_s = p_s.unsqueeze(1)
        x = torch.cat((x, p_s), dim=1).unsqueeze(1)  # [batch_size, 1, 769]
        residual = self.residual(x)  # [batch_size, 256, 96]
        x = self.conv1(x)  # [batch_size, 64, 384]
        x = self.conv2(x)  # [batch_size, 128, 192]
        x = self.conv3(x)  # [batch_size, 256, 96]
        # Ensure residual matches x’s size
        if residual.size(2) != x.size(2):
            residual = nn.functional.interpolate(residual, size=x.size(2), mode='nearest')
        x = x + residual  # Residual connection
        x = x.view(x.size(0), -1)  # [batch_size, 256 * 96]
        x = self.fc_layers(x)
        return x, None

screener = VulnScreener().to(device)
analyzer = VulnAnalyzer().to(device)

screener = torch.load('../codebert/vuln_screener_model.pth', weights_only=False).to(device)
analyzer = torch.load('../codebert/vuln_analyzer_model.pth', weights_only=False).to(device)
screener.eval()
analyzer.eval()

# 串接流程
with torch.no_grad():
    train_prob = screener(X_train)
    test_prob = screener(X_test)
    analyzer_train_prob = analyzer(X_train, train_prob)
    analyzer_test_prob = analyzer(X_test, test_prob)

/Users/rita/Documents/9309_ML/Smart_Contract_Vulnerabilities_Project/Model Training
Training on device: mps
X_train shape: torch.Size([4294, 768])
y_train shape: torch.Size([4294, 9])
X_test shape: torch.Size([1074, 768])
y_test shape: torch.Size([1074, 9])


In [2]:
# Define VulnValidator without attention weights
class VulnValidator:
    def __init__(self, n_trees=100, max_depth=10, pca_components=50):
        self.rf = RandomForestClassifier(
            n_estimators=n_trees, 
            max_depth=max_depth,
            random_state=42
        )
        self.pca = PCA(n_components=pca_components)
        self.feature_importance = None
        
    def prepare_features(self, X, p_a, p_s):
        # Move tensors to CPU and convert to NumPy
        X = X.cpu().detach().numpy() if torch.is_tensor(X) else X
        p_a = p_a.cpu().detach().numpy() if torch.is_tensor(p_a) else p_a
        p_s = p_s.cpu().detach().numpy() if torch.is_tensor(p_s) else p_s
        
        # Compute stats on NumPy arrays (already on CPU)
        stats = np.hstack([X.mean(axis=1, keepdims=True), 
                          X.var(axis=1, keepdims=True)])
        features = np.hstack([p_a, p_s, stats])
        return features
    
    def fit(self, X, p_a, p_s, y_train):
        features = self.prepare_features(X, p_a, p_s)
        y_train = y_train.cpu().detach().numpy() if torch.is_tensor(y_train) else y_train
        self.rf.fit(features, y_train)
        self.feature_importance = self.rf.feature_importances_
    
    def predict(self, X, p_a, p_s):
        features = self.prepare_features(X, p_a, p_s)
        p_v = self.rf.predict_proba(features)[:, 1]  # Adjust if multi-label
        return p_v
    
    def generate_validation_report(self, p_a, p_v, threshold=0.2):
        report = {"anomalies": [], "corrections": []}
        p_a = p_a.cpu().detach().numpy() if torch.is_tensor(p_a) else p_a
        
        for i in range(p_a.shape[1]):
            diff = np.abs(p_a[:, i] - p_v[:, i])
            mask_anomaly = (diff > threshold) & (p_a[:, i] > 0.5) & (p_v[:, i] < 0.5)
            mask_correction = (diff > threshold) & (p_a[:, i] < 0.5) & (p_v[:, i] > 0.5)
            
            if np.any(mask_anomaly):
                report["anomalies"].append(f"Vuln {i}")
            if np.any(mask_correction):
                report["corrections"].append(f"Vuln {i}")
        return report

In [3]:
def fuse_outputs(p_a, p_v):
    p_f = 0.7 * p_a + 0.3 * p_v
    p_f0 = (p_f.max(dim=1)[0] > 0.5).float().unsqueeze(1)
    return torch.cat([p_f0, p_f], dim=1)

# Validation
def evaluate_results(p_f, y_true, split="Test"):
    y_pred = (p_f[:, 1:] > 0.5).float().cpu().numpy()
    y_true = y_true[:, 1:].cpu().numpy()
    
    print(f"\n{split} Set Results:")
    accuracy_per_vuln = [accuracy_score(y_true[:, i], y_pred[:, i]) for i in range(8)]
    for i, acc in enumerate(accuracy_per_vuln):
        print(f"Vuln {i}: Accuracy = {acc:.4f}")
    print(f"Overall Accuracy: {accuracy_score(y_true.flatten(), y_pred.flatten()):.4f}")
    
    # Generate and print validation report
    report = validator.generate_validation_report(analyzer_test_prob if split == "Test" else analyzer_train_prob, 
                                                p_v_test if split == "Test" else p_v_train)
    print(f"\nValidation Report ({split}):")
    print(f"Anomalies: {report['anomalies']}")
    print(f"Corrections: {report['corrections']}")

In [4]:
print(f"X_train device: {X_train.device}")
print(f"train_prob device: {train_prob.device}")
#print(f"analyzer_train_prob device: {analyzer_train_prob.device}")

validator = VulnValidator()
validator.fit(X_train, analyzer_train_prob, train_prob, y_train)

# Get validator predictions
p_v_train = torch.tensor(validator.predict(X_train, analyzer_train_prob, train_prob))
p_v_test = torch.tensor(validator.predict(X_test, analyzer_test_prob, test_prob))

# Fuse outputs
p_f_train = fuse_outputs(analyzer_train_prob, p_v_train)
p_f_test = fuse_outputs(analyzer_test_prob, p_v_test)

evaluate_results(p_f_train, y_train, "Train")
evaluate_results(p_f_test, y_test, "Test")

X_train device: mps:0
train_prob device: mps:0


TypeError: can't convert mps:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.