In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns
import os
import ast
import numpy as np
import itertools
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, classification_report, roc_curve, roc_auc_score, precision_recall_curve, confusion_matrix, precision_score, f1_score, recall_score

print(os.getcwd())

# Set device to CPU explicitly
device = torch.device("cpu")
print(f"Training on device: {device}")

# Load data
df_train = pd.read_csv("../codebert/train_data.csv")
df_test = pd.read_csv("../codebert/test_data.csv")

# Load tensors directly to CPU
X_train = torch.load("../codebert/X_train.pt", weights_only=False)
X_test = torch.load("../codebert/X_test.pt", weights_only=False)
y_train = torch.load("../codebert/y_train.pt", weights_only=False)
y_test = torch.load("../codebert/y_test.pt", weights_only=False)

# Verify shapes
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

# Load VulnScreener and get probabilities
class VulnScreener(nn.Module):
    def __init__(self):
        super(VulnScreener, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(768, 256),  # Input layer to Hidden Layer 1
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),  # Hidden Layer 1 to Hidden Layer 2
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),    # Hidden Layer 2 to Output Layer
            nn.Sigmoid()          # Probability output
        )
    
    def forward(self, x):
        return self.mlp(x)  # Forward pass through the network

# Define VulnAnalyzer
class VulnAnalyzer(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(VulnAnalyzer, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 769 -> 384
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 384 -> 192
        )
        self.conv3 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 192 -> 96
        )
        # Adjust residual path to match output size of conv3 (256 channels, 96 length)
        self.residual = nn.Sequential(
            nn.Conv1d(1, 256, kernel_size=1),
            nn.AvgPool1d(kernel_size=8, stride=8)  # Downsample 769 to ~96
        )
        self.fc_layers = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256 * 96, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 9),
            nn.Sigmoid()
        )
    
    def forward(self, x, p_s):
        if p_s.dim() == 1:
            p_s = p_s.unsqueeze(1)
        x = torch.cat((x, p_s), dim=1).unsqueeze(1)  # [batch_size, 1, 769]
        residual = self.residual(x)  # [batch_size, 256, 96]
        x = self.conv1(x)  # [batch_size, 64, 384]
        x = self.conv2(x)  # [batch_size, 128, 192]
        x = self.conv3(x)  # [batch_size, 256, 96]
        # Ensure residual matches x’s size
        if residual.size(2) != x.size(2):
            residual = nn.functional.interpolate(residual, size=x.size(2), mode='nearest')
        x = x + residual  # Residual connection
        x = x.view(x.size(0), -1)  # [batch_size, 256 * 96]
        x = self.fc_layers(x)
        return x, None

# Initialize models (no .to(device) needed since we’re on CPU)
screener = VulnScreener()
analyzer = VulnAnalyzer()

# Load pre-trained weights (keep on CPU)
screener = torch.load('../codebert/vuln_screener_model.pth', weights_only=False)
analyzer = torch.load('../codebert/vuln_analyzer_model.pth', weights_only=False)
screener.eval()
analyzer.eval()

# Inference with no gradient computation
with torch.no_grad():
    train_prob = screener(X_train)
    test_prob = screener(X_test)
    analyzer_train_prob, _ = analyzer(X_train, train_prob)
    analyzer_test_prob, _ = analyzer(X_test, test_prob)

/Users/rita/Documents/9309_ML/Smart_Contract_Vulnerabilities_Project/Model Training
Training on device: cpu
X_train shape: torch.Size([4294, 768])
y_train shape: torch.Size([4294, 9])
X_test shape: torch.Size([1074, 768])
y_test shape: torch.Size([1074, 9])


RuntimeError: Mismatched Tensor types in NNPack convolutionOutput

In [None]:
# Define VulnValidator
class VulnValidator:
    def __init__(self, n_trees=100, max_depth=10, pca_components=50):
        self.rf = RandomForestClassifier(
            n_estimators=n_trees, 
            max_depth=max_depth,
            random_state=42
        )
        self.pca = PCA(n_components=pca_components)
        self.feature_importance = None
        
    def prepare_features(self, X, p_a, p_s, attn):
        X = X.cpu().detach().numpy() if torch.is_tensor(X) else X
        p_a = p_a.cpu().detach().numpy() if torch.is_tensor(p_a) else p_a
        p_s = p_s.cpu().detach().numpy() if torch.is_tensor(p_s) else p_s
        attn = attn.cpu().detach().numpy() if torch.is_tensor(attn) else attn
        
        attn_reduced = attn.mean(axis=-1)  # Reduce [batch_size, 768] to [batch_size]
        stats = np.hstack([X.mean(axis=1, keepdims=True), 
                          X.var(axis=1, keepdims=True)])
        features = np.hstack([p_a, p_s, attn_reduced, stats])
        return features
    
    def fit(self, X, p_a, p_s, attn, y_train):
        features = self.prepare_features(X, p_a, p_s, attn)
        y_train = y_train.cpu().detach().numpy() if torch.is_tensor(y_train) else y_train
        self.rf.fit(features, y_train)
        self.feature_importance = self.rf.feature_importances_
    
    def predict(self, X, p_a, p_s, attn):
        features = self.prepare_features(X, p_a, p_s, attn)
        p_v = self.rf.predict_proba(features)[:, 1]  # Adjust if multi-label
        return p_v
    
    def generate_validation_report(self, p_a, p_v, threshold=0.2):
        report = {"anomalies": [], "corrections": []}
        p_a = p_a.cpu().detach().numpy() if torch.is_tensor(p_a) else p_a
        
        for i in range(p_a.shape[1]):
            diff = np.abs(p_a[:, i] - p_v[:, i])
            mask_anomaly = (diff > threshold) & (p_a[:, i] > 0.5) & (p_v[:, i] < 0.5)
            mask_correction = (diff > threshold) & (p_a[:, i] < 0.5) & (p_v[:, i] > 0.5)
            
            if np.any(mask_anomaly):
                report["anomalies"].append(f"Vuln {i}")
            if np.any(mask_correction):
                report["corrections"].append(f"Vuln {i}")
        return report

In [None]:
def fuse_outputs(p_a, p_v):
    p_f = 0.7 * p_a + 0.3 * p_v
    p_f0 = (p_f.max(dim=1)[0] > 0.5).float().unsqueeze(1)
    return torch.cat([p_f0, p_f], dim=1)

def evaluate_results(p_f, y_true, split="Test"):
    y_pred = (p_f[:, 1:] > 0.5).float().cpu().numpy()
    y_true = y_true[:, 1:].cpu().numpy()
    
    print(f"\n{split} Set Results:")
    accuracy_per_vuln = [accuracy_score(y_true[:, i], y_pred[:, i]) for i in range(8)]
    for i, acc in enumerate(accuracy_per_vuln):
        print(f"Vuln {i}: Accuracy = {acc:.4f}")
    print(f"Overall Accuracy: {accuracy_score(y_true.flatten(), y_pred.flatten()):.4f}")
    
    report = validator.generate_validation_report(analyzer_test_prob if split == "Test" else analyzer_train_prob, 
                                                 p_v_test if split == "Test" else p_v_train)
    print(f"\nValidation Report ({split}):")
    print(f"Anomalies: {report['anomalies']}")
    print(f"Corrections: {report['corrections']}")

In [None]:
# Initialize and train VulnValidator
# Use reduced attention size to avoid memory issues
attn_train = torch.rand(X_train.shape[0], 768)  # [batch_size, 768] instead of [batch_size, 768, 768]
attn_test = torch.rand(X_test.shape[0], 768)

validator = VulnValidator()
validator.fit(X_train, analyzer_train_prob, train_prob, attn_train, y_train)

# Get validator predictions
p_v_train = torch.tensor(validator.predict(X_train, analyzer_train_prob, train_prob, attn_train))
p_v_test = torch.tensor(validator.predict(X_test, analyzer_test_prob, test_prob, attn_test))

# Fuse outputs
p_f_train = fuse_outputs(analyzer_train_prob, p_v_train)
p_f_test = fuse_outputs(analyzer_test_prob, p_v_test)

evaluate_results(p_f_train, y_train, "Train")
evaluate_results(p_f_test, y_test, "Test")