In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
import seaborn as sns
import os
import ast
import numpy as np
import itertools
from xgboost import XGBClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.multioutput import MultiOutputClassifier
from collections import Counter
from sklearn.decomposition import PCA
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, classification_report, roc_curve, roc_auc_score, precision_recall_curve, confusion_matrix, precision_score, f1_score, recall_score

print(os.getcwd())

# Check for MPS availability (Apple Silicon GPU)
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Training on device: {device}")

X_train = torch.load("../codebert/X_train.pt", weights_only=False).to(device)
X_test = torch.load("../codebert/X_test.pt", weights_only=False).to(device)
y_train = torch.load("../codebert/y_train.pt", weights_only=False).to(device)
y_test = torch.load("../codebert/y_test.pt", weights_only=False).to(device)

# Verify shapes
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

# Load VulnScreener and get probabilities
class VulnScreener(nn.Module):
    def __init__(self):
        super(VulnScreener, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(768, 256),  # Input layer to Hidden Layer 1
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 128),  # Hidden Layer 1 to Hidden Layer 2
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),    # Hidden Layer 2 to Output Layer
            nn.Sigmoid()          # Probability output
        )
    
    def forward(self, x):
        return self.mlp(x)  # Forward pass through the network

class VulnAnalyzer(nn.Module):
    def __init__(self, dropout_rate=0.2):
        super(VulnAnalyzer, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv1d(1, 64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 769 -> 384
        )
        self.conv2 = nn.Sequential(
            nn.Conv1d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 384 -> 192
        )
        self.conv3 = nn.Sequential(
            nn.Conv1d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.MaxPool1d(2)  # 192 -> 96
        )
        # Adjust residual path to match output size of conv3 (256 channels, 96 length)
        self.residual = nn.Sequential(
            nn.Conv1d(1, 256, kernel_size=1),
            nn.AvgPool1d(kernel_size=8, stride=8)  # Downsample 769 to ~96
        )
        self.fc_layers = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(256 * 96, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 9),
            nn.Sigmoid()
        )
    
    def forward(self, x, p_s):
        if p_s.dim() == 1:
            p_s = p_s.unsqueeze(1)
        x = torch.cat((x, p_s), dim=1).unsqueeze(1)  # [batch_size, 1, 769]
        residual = self.residual(x)  # [batch_size, 256, 96]
        x = self.conv1(x)  # [batch_size, 64, 384]
        x = self.conv2(x)  # [batch_size, 128, 192]
        x = self.conv3(x)  # [batch_size, 256, 96]
        # Ensure residual matches x’s size
        if residual.size(2) != x.size(2):
            residual = nn.functional.interpolate(residual, size=x.size(2), mode='nearest')
        x = x + residual  # Residual connection
        x = x.view(x.size(0), -1)  # [batch_size, 256 * 96]
        x = self.fc_layers(x)
        return x, None

screener = VulnScreener().to(device)
analyzer = VulnAnalyzer().to(device)

screener = torch.load('../codebert/vuln_screener_model.pth', weights_only=False).to(device)
analyzer = torch.load('../codebert/vuln_analyzer_model.pth', weights_only=False).to(device)
screener.eval()
analyzer.eval()

# 串接流程
with torch.no_grad():
    screener_train_prob = screener(X_train).to(device)
    screener_test_prob = screener(X_test).to(device)
    analyzer_train_prob,_ = analyzer(X_train, screener_train_prob)
    analyzer_test_prob,_ = analyzer(X_test, screener_test_prob)
    print(f"screener_train_prob: {screener_train_prob.shape}") 
    print(f"screener_test_prob: {screener_test_prob.shape}")
    print(f"analyzer_train_prob: {analyzer_train_prob.shape}")
    print(f"analyzer_test_prob: {analyzer_test_prob.shape}")
    

/Users/rita/Documents/9309_ML/Smart_Contract_Vulnerabilities_Project/Model Training
Training on device: mps
X_train shape: torch.Size([4294, 768])
y_train shape: torch.Size([4294, 9])
X_test shape: torch.Size([1074, 768])
y_test shape: torch.Size([1074, 9])
screener_train_prob: torch.Size([4294, 1])
screener_test_prob: torch.Size([1074, 1])
analyzer_train_prob: torch.Size([4294, 9])
analyzer_test_prob: torch.Size([1074, 9])


In [2]:
class VulnValidator:
    def __init__(self, n_classes=9, random_state=42):
        self.n_classes = n_classes
        self.random_state = random_state
        # 優化 XGBoost 參數
        self.model = MultiOutputClassifier(XGBClassifier(
            objective='binary:logistic',
            eval_metric='logloss',
            random_state=self.random_state,
            max_depth=6,           # 增加樹深度
            learning_rate=0.1,     # 降低學習率
            n_estimators=200,      # 增加樹數量
            scale_pos_weight=5     # 增加正樣本權重，應對不平衡
        ))
        self.thresholds = None

    def _concatenate_features(self, X, analyzer_prob, screener_prob):
        X_np = X.cpu().numpy().reshape(X.shape[0], -1)
        analyzer_np = analyzer_prob.cpu().numpy().reshape(analyzer_prob.shape[0], -1)
        screener_np = screener_prob.cpu().numpy().reshape(screener_prob.shape[0], -1)
        return np.concatenate([X_np, analyzer_np, screener_np], axis=1)

    def balance_data_per_class(self, X, y, min_samples=500):
        """
        逐類應用 SMOTE，並設置最小樣本數
        """
        smote = SMOTE(random_state=self.random_state, k_neighbors=5)
        X_balanced_list = []
        y_balanced_list = []

        for i in range(self.n_classes):
            y_class = y[:, i]
            if y_class.sum() < 5:  # 如果正樣本太少，跳過 SMOTE
                X_balanced_list.append(X)
                y_balanced_list.append(y_class)
                continue
            X_bal, y_bal = smote.fit_resample(X, y_class)
            X_balanced_list.append(X_bal)
            y_balanced_list.append(y_bal)

        max_samples = max(max(X.shape[0], min_samples) for X in X_balanced_list)
        X_balanced_final = np.zeros((max_samples, X_balanced_list[0].shape[1]))
        y_balanced_final = np.zeros((max_samples, self.n_classes))

        for i in range(self.n_classes):
            X_bal = X_balanced_list[i]
            y_bal = y_balanced_list[i]
            indices = np.random.choice(X_bal.shape[0], max_samples, replace=True)
            X_balanced_final = X_bal[indices] if i == 0 else X_balanced_final
            y_balanced_final[:, i] = y_bal[indices]

        return X_balanced_final, y_balanced_final

    def fit(self, X_train, analyzer_train_prob, screener_train_prob, y_train):
        X_combined = self._concatenate_features(X_train, analyzer_train_prob, screener_train_prob)
        y_train_np = y_train.cpu().numpy()

        X_balanced, y_balanced = self.balance_data_per_class(X_combined, y_train_np)
        print(f"Balanced X shape: {X_balanced.shape}, Balanced y shape: {y_balanced.shape}")

        self.model.fit(X_balanced, y_balanced)
        print("XGBoost model training completed.")

    def predict_proba(self, X, analyzer_prob, screener_prob):
        X_combined = self._concatenate_features(X, analyzer_prob, screener_prob)
        return self.model.predict_proba(X_combined)

    def optimize_thresholds(self, X_val, analyzer_val_prob, screener_val_prob, y_val, 
                          threshold_range=np.arange(0.1, 0.9, 0.05)):
        val_probs = self.predict_proba(X_val, analyzer_val_prob, screener_val_prob)
        y_val_np = y_val.cpu().numpy()
        self.thresholds = np.zeros(self.n_classes)

        for i in range(self.n_classes):
            best_f1 = 0
            best_threshold = 0.5
            y_true = y_val_np[:, i]
            y_prob = val_probs[i][:, 1]

            for threshold in threshold_range:
                y_pred = (y_prob >= threshold).astype(int)
                f1 = f1_score(y_true, y_pred)
                recall = recall_score(y_true, y_pred)
                if f1 > best_f1 and recall > 0.1:  # 提高 Recall 下限
                    best_f1 = f1
                    best_threshold = threshold

            self.thresholds[i] = best_threshold
            print(f"Class {i}: Best Threshold = {best_threshold:.2f}, F1-score = {best_f1:.4f}")

        return self.thresholds

    def predict(self, X, analyzer_prob, screener_prob, thresholds=None):
        if thresholds is None:
            thresholds = self.thresholds if self.thresholds is not None else [0.5] * self.n_classes

        probs = self.predict_proba(X, analyzer_prob, screener_prob)
        y_pred = np.zeros((X.shape[0], self.n_classes))

        for i in range(self.n_classes):
            y_pred[:, i] = (probs[i][:, 1] >= thresholds[i]).astype(int)

        return y_pred

In [3]:
def fuse_outputs(p_a, p_v):
    p_v = p_v.to(p_a.device)
    p_v = p_v.expand(-1, p_a.shape[1])
    
    p_f = 0.3 * p_a + 0.7 * p_v
    
    return p_f
    
def evaluate_predictions(y_true, y_pred):
    y_true_np = y_true.cpu().numpy()
    y_pred_np = y_pred.cpu().numpy()

    if y_pred_np.max() <= 1.0 and y_pred_np.min() >= 0.0:  # 假設是概率
        y_pred_np = (y_pred_np >= 0.5).astype(int)

    print("Classification Report:")
    print(classification_report(y_true_np, y_pred_np))

    for i in range(y_true_np.shape[1]):
        print(f"\nClass {i} Metrics:")
        print(f"Precision: {precision_score(y_true_np[:, i], y_pred_np[:, i]):.4f}")
        print(f"Recall: {recall_score(y_true_np[:, i], y_pred_np[:, i]):.4f}")
        print(f"F1-score: {f1_score(y_true_np[:, i], y_pred_np[:, i]):.4f}")

In [4]:
validator = VulnValidator()
validator.fit(X_train, analyzer_train_prob, screener_train_prob, y_train)

# Get validator predictions
validator_test_prob = torch.from_numpy(validator.predict(X_test, analyzer_test_prob, screener_test_prob))
validator_test_prob = validator_test_prob.clone().detach().to(device, dtype=torch.float32)
evaluate_predictions(validator_test_prob, y_test)

# Fuse outputs
fuse_test = fuse_outputs(analyzer_test_prob, validator_test_prob)
evaluate_predictions(y_test, fuse_test)

Balanced X shape: (8470, 778), Balanced y shape: (8470, 9)
XGBoost model training completed.
Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.87      0.87       562
           1       0.92      0.07      0.13       978
           2       1.00      0.02      0.04      1003
           3       0.78      0.01      0.03      1005
           4       0.83      0.05      0.09      1004
           5       0.95      0.10      0.19      1001
           6       0.94      0.19      0.32       983
           7       0.87      0.05      0.09       979
           8       0.93      0.19      0.31       992

   micro avg       0.90      0.14      0.24      8507
   macro avg       0.90      0.17      0.23      8507
weighted avg       0.90      0.14      0.20      8507
 samples avg       0.89      0.14      0.24      8507


Class 0 Metrics:
Precision: 0.8703
Recall: 0.8719
F1-score: 0.8711

Class 1 Metrics:
Precision: 0.9167
Recall: 0.0675
F1-sco