In [1]:
import os
import re
import tokenize
from io import BytesIO

from sklearn.feature_extraction.text import TfidfVectorizer
import torch
import torch.nn as nn
import torch.optim as optim
from skorch import NeuralNetClassifier
from skorch.callbacks import EarlyStopping
from sklearn.base import BaseEstimator
import pickle
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import joblib
from sklearn.ensemble import RandomForestClassifier
import numpy as np


In [37]:
# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Preprocessing

In [2]:
# Preprocessing functions
def tokenize_code(code, file_path):
    tokens = []
    reader = BytesIO(code.encode('utf-8')).readline
    try:
        for toknum, tokval, _, _, _ in tokenize.tokenize(reader):
            if toknum != tokenize.ENCODING:
                tokens.append(tokval)
    except tokenize.TokenError as e:
        print("Error tokenizing code in file:", file_path)
    return tokens

def normalize_code(code):
    code = re.sub(r'//.*', '', code)
    code = re.sub(r'/\*.*?\*/', '', code, flags=re.DOTALL)
    code = re.sub(r'\s+', ' ', code).strip()
    return code

def preprocess_code(code, file_path):
    normalized_code = normalize_code(code)
    tokens = tokenize_code(normalized_code, file_path)
    return ' '.join(tokens)

# Load data from directories

def load_data_from_directory(directory, label):
    data = []
    labels = []
    for filename in os.listdir(directory):
        if filename.endswith(".sol"):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r', encoding='utf-8') as file:
                try:
                    code = file.read()
                    preprocessed_code = preprocess_code(code, filepath)
                    data.append(preprocessed_code)
                    labels.append(label)
                except Exception as e:
                    print("Error processing file:", filepath)
    return data, labels

In [4]:
#Only Run this code on first time, then save the data and labels so next time can be loaded directly

# Paths to the directories
vulnerable_dir = './Contracts for training/Re-entrancy'
non_vulnerable_dir = './Contracts for training/Verified'

# Load and label the data
vulnerable_data, vulnerable_labels = load_data_from_directory(vulnerable_dir, 1)
non_vulnerable_data, non_vulnerable_labels = load_data_from_directory(non_vulnerable_dir, 0)

# Combine the data and labels
data = vulnerable_data + non_vulnerable_data
labels = vulnerable_labels + non_vulnerable_labels

## Save Preprocessed Data

In [5]:
# Paths to save the data and labels
data_path = 'preprocessed_data.pkl' #File too huge, cannot be uploaded to github
labels_path = 'labels.pkl'

# Save the preprocessed data
with open(data_path, 'wb') as data_file:
    pickle.dump(data, data_file)

# Save the labels
with open(labels_path, 'wb') as labels_file:
    pickle.dump(labels, labels_file)

## Load Saved Preprocessed Data

In [3]:
from sklearn.model_selection import train_test_split

# Paths to the saved data and labels
data_path = 'preprocessed_data.pkl'
labels_path = 'labels.pkl'

# Load the preprocessed data
with open(data_path, 'rb') as data_file:
    data = pickle.load(data_file)

# Load the labels
with open(labels_path, 'rb') as labels_file:
    labels = pickle.load(labels_file)

# Reduce the dataset by 20%
data, _, labels, _ = train_test_split(data, labels, test_size=0.2, random_state=42)

## Vectorizer

In [4]:
vectorizer = TfidfVectorizer(max_features=1000)

In [None]:
# Save the fitted vectorizer
joblib.dump(vectorizer, 'tfidf_vectors.pkl')

## Neural Network Vectors

In [6]:
X_nn = vectorizer.fit_transform(data).toarray().astype('float32')
y_nn = torch.tensor(labels, dtype=torch.float32).unsqueeze(1)

## Tfid Vectors

In [8]:
X = vectorizer.fit_transform(data).toarray()
y = labels

# Feed Forward Neural Network

In [7]:
# Define the neural network model
class SmartContractVulnerabilityModel(nn.Module):
    def __init__(self, input_dim, hidden_dim1=256, hidden_dim2=128):
        super(SmartContractVulnerabilityModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.fc3 = nn.Linear(hidden_dim2, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [8]:
# Skorch wrapper for the PyTorch model
net = NeuralNetClassifier(
    SmartContractVulnerabilityModel,
    module__input_dim=1000,
    max_epochs=20,  # Higher number of epochs with early stopping
    lr=0.001,
    optimizer=optim.Adam,
    criterion=nn.BCEWithLogitsLoss,
    iterator_train__shuffle=True,
    callbacks=[EarlyStopping(patience=5)],  # Early stopping after 5 epochs without improvement
)

# Hyperparameter grid
params = {
    'lr': [0.001],
    'max_epochs': [20],
    'module__hidden_dim1': [256],
    'module__hidden_dim2': [128]
}

In [9]:
# Initialize GridSearchCV
#cv is for cross validation
#n_jobs=-1 means use all available cores
gs = GridSearchCV(net, params, refit=True, cv=5, scoring='accuracy')

# Perform grid search
gs.fit(X_nn, y_nn)

  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1        [36m0.1946[0m       [32m0.9242[0m        [35m0.1783[0m  25.6279
      2        [36m0.1701[0m       [32m0.9292[0m        [35m0.1678[0m  20.5374
      3        [36m0.1617[0m       [32m0.9307[0m        [35m0.1678[0m  21.8351
      4        [36m0.1565[0m       [32m0.9320[0m        [35m0.1642[0m  18.8975
      5        [36m0.1528[0m       [32m0.9334[0m        [35m0.1624[0m  22.9703
      6        [36m0.1496[0m       [32m0.9339[0m        0.1626  23.1551
      7        [36m0.1465[0m       0.9333        [35m0.1619[0m  23.9718
      8        [36m0.1438[0m       0.9296        0.1705  21.7549
      9        [36m0.1415[0m       0.9328        0.1641  24.3057
     10        [36m0.1393[0m       0.9333        0.1658  27.9347
     11        [36m0.1372[0m       [32m0.9340[0m        0.1667  30.2313
Stopping since valid_loss has 

In [None]:
# Print the best parameters and the best score
print("Best parameters found:", gs.best_params_)
print("Best score:", gs.best_score_)

Best parameters found: {'lr': 0.001, 'max_epochs': 20, 'module__hidden_dim1': 256, 'module__hidden_dim2': 128}
Best score: 0.9341476835048672


In [None]:
# Save the best model
torch.save(gs.best_estimator_.module_.state_dict(), 'neural_network_model.pth')

In [14]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Use the best model found by GridSearchCV
best_model = gs.best_estimator_

# Fit the best model on the entire training set
best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Score: {f1:.4f}")

# Print detailed classification report
print(classification_report(y_test, y_pred))


NameError: name 'gs' is not defined

In [12]:
import joblib
from sklearn.metrics import confusion_matrix
import numpy as np

# Load the saved model
model = joblib.load('neural_network_vectors.pkl')

# Assuming you have your test data and labels in X_test and y_test respectively
# X_test = ...
# y_test = ...

# Make predictions
y_pred = model.predict(X)

# Calculate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

print("Confusion Matrix:")
print(cm)


AttributeError: 'TfidfVectorizer' object has no attribute 'predict'

# Random Forest

In [12]:
# Define the model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [13]:
params_rf = {
    'n_estimators': [50, 100],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5]
}

# Initialize GridSearchCV
gs_rf  = GridSearchCV(
    rf_model, 
    params_rf, 
    refit=True, 
    cv=3, 
    scoring='accuracy',
    verbose=2 
)

In [14]:
gs_rf.fit(X, y)

# Print the best parameters and the best score
print("Best parameters found:", gs_rf.best_params_)
print("Best score:", gs_rf.best_score_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 4.3min
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 3.8min
[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 3.9min
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=10.8min
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=10.2min
[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time=10.2min
[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 5.0min
[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 4.8min
[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 4.6min
[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 5.9min
[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 6.1min
[CV] END max_depth=None, min_sam

In [50]:
# Save the best model
joblib.dump(gs_rf.best_estimator_, 'random_forest_model.pkl')

['random_forest_model.pkl']

In [51]:
# Use the best model found by GridSearchCV for Random Forest
best_rf_model = gs_rf.best_estimator_
best_rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred_rf = best_rf_model.predict(X_test)

# Evaluate the Random Forest model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)

In [52]:
print(f"Random Forest - Test Accuracy: {accuracy_rf:.4f}")
print(f"Random Forest - Test Precision: {precision_rf:.4f}")
print(f"Random Forest - Test Recall: {recall_rf:.4f}")
print(f"Random Forest - Test F1 Score: {f1_rf:.4f}")

print(classification_report(y_test, y_pred_rf))

Random Forest - Test Accuracy: 0.9371
Random Forest - Test Precision: 0.8096
Random Forest - Test Recall: 0.9797
Random Forest - Test F1 Score: 0.8866
              precision    recall  f1-score   support

           0       0.99      0.92      0.96     75727
           1       0.81      0.98      0.89     25360

    accuracy                           0.94    101087
   macro avg       0.90      0.95      0.92    101087
weighted avg       0.95      0.94      0.94    101087



# Graph Neural Network

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float)
y_test = torch.tensor(y_test, dtype=torch.long)

In [16]:
class GCN(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim1)
        self.conv2 = GCNConv(hidden_dim1, hidden_dim2)
        self.conv3 = GCNConv(hidden_dim2, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = torch.relu(x)
        x = self.conv2(x, edge_index)
        x = torch.relu(x)
        x = self.conv3(x, edge_index)
        return torch.log_softmax(x, dim=1)

# Wrap the GNN model for use with Skorch
class SkorchGCN(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, output_dim):
        super(SkorchGCN, self).__init__()
        self.model = GCN(input_dim, hidden_dim1, hidden_dim2, output_dim)
        
    def forward(self, x):
        edge_index = torch.tensor([[0, 1], [1, 0]], dtype=torch.long)
        data = Data(x=x, edge_index=edge_index)
        return self.model(data)

In [17]:
net = NeuralNetClassifier(
    SkorchGCN,
    module__input_dim=X.shape[1],
    module__hidden_dim1=64,
    module__hidden_dim2=32,
    module__output_dim=2,
    max_epochs=20,
    lr=0.01,
    optimizer=optim.Adam,
    criterion=nn.CrossEntropyLoss,
    iterator_train__shuffle=True,
    callbacks=[EarlyStopping(patience=5)],
)

# Hyperparameter grid
params_gnn = {
    'lr': [0.01, 0.001],
    'max_epochs': [10, 20],
    'module__hidden_dim1': [64,128],
    'module__hidden_dim2': [16, 32, 64],
}

In [18]:
# Initialize GridSearchCV
gs_gnn = GridSearchCV(net, params_gnn, refit=True, cv=3, scoring='accuracy', verbose=2)

In [19]:
# Perform grid search
gs_gnn.fit(X_train, y_train)

print("Best parameters found:", gs_gnn.best_params_)
print("Best score:", gs_gnn.best_score_)

Fitting 3 folds for each of 24 candidates, totalling 72 fits
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.1941[0m       [32m0.9199[0m        [35m0.1828[0m  9.7510
      2        [36m0.1758[0m       [32m0.9232[0m        [35m0.1783[0m  9.7684
      3        [36m0.1686[0m       [32m0.9266[0m        [35m0.1746[0m  10.2583
      4        [36m0.1641[0m       0.9265        [35m0.1742[0m  15.5983
      5        [36m0.1608[0m       [32m0.9285[0m        [35m0.1723[0m  15.5799
      6        [36m0.1584[0m       [32m0.9297[0m        [35m0.1690[0m  14.1114
      7        [36m0.1563[0m       0.9288        0.1736  18.1097
      8        [36m0.1542[0m       0.9295        0.1692  16.4285
      9        [36m0.1527[0m       0.9286        0.1718  18.9845
     10        [36m0.1527[0m       0.9284        0.1738  17.9675
[CV] END lr=0.01, max_epochs=10, module__hidden_dim1=64, m

In [58]:
# Save the best model
torch.save(gs_gnn.best_estimator_.module_.state_dict(), 'gnn_best_model.pth')

In [59]:
# Use the best model found by GridSearchCV
gs_gnn_best_model = gs_gnn.best_estimator_

# Fit the best model on the entire training set
gs_gnn_best_model.fit(X_train, y_train)

# Predict on the test set
y_pred = gs_gnn_best_model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

Re-initializing module because the following parameters were re-set: hidden_dim1, hidden_dim2, input_dim, output_dim.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_acc    valid_loss      dur
-------  ------------  -----------  ------------  -------
      1        [36m0.1922[0m       [32m0.9242[0m        [35m0.1754[0m  20.6575
      2        [36m0.1711[0m       [32m0.9271[0m        [35m0.1699[0m  19.5834
      3        [36m0.1646[0m       [32m0.9309[0m        [35m0.1660[0m  20.2723
      4        [36m0.1608[0m       0.9308        0.1663  21.3791
      5        [36m0.1578[0m       [32m0.9311[0m        [35m0.1650[0m  19.7376
      6        [36m0.1554[0m       [32m0.9316[0m        [35m0.1644[0m  20.1370
      7        [36m0.1529[0m       [32m0.9327[0m        [35m0.1638[0m  20.6030
      8        [36m0.1510[0m       0.9324        0.1656  20.2046
      9        [36m0.1496[0m       0.9319        0.1650  20.2349
 

In [60]:
print(f"GNN - Test Accuracy: {accuracy:.4f}")
print(f"GNN - Test Precision: {precision:.4f}")
print(f"GNN - Test Recall: {recall:.4f}")
print(f"GNN - Test F1 Score: {f1:.4f}")
print(classification_report(y_test, y_pred))

GNN - Test Accuracy: 0.9319
GNN - Test Precision: 0.8131
GNN - Test Recall: 0.9457
GNN - Test F1 Score: 0.8744
              precision    recall  f1-score   support

           0       0.98      0.93      0.95     75727
           1       0.81      0.95      0.87     25360

    accuracy                           0.93    101087
   macro avg       0.90      0.94      0.91    101087
weighted avg       0.94      0.93      0.93    101087



# Back Propagation Neural Network

In [20]:
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [21]:
class BPNNContractVulnerabilityModel(nn.Module):
    def __init__(self, input_dim, hidden_dim1=256):
        super(BPNNContractVulnerabilityModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.sigmoid(x).squeeze(1)

In [22]:
input_dim = X.shape[1]
net = NeuralNetClassifier(
    module=BPNNContractVulnerabilityModel,
    module__input_dim=input_dim,
    max_epochs=20,
    lr=0.01,
    optimizer=optim.Adam,
    criterion=nn.BCELoss,
    iterator_train__shuffle=True,
    device='cuda' if torch.cuda.is_available() else 'cpu',
    callbacks=[EarlyStopping(patience=5)]
)

# Define parameter grid for grid search
params_bpnn = {
    'lr': [0.01, 0.001],
    'module__hidden_dim1': [128, 256, 512],
    'max_epochs': [10, 20]
}

In [23]:
gs_bpnn = GridSearchCV(net, params_bpnn, cv=3, scoring='accuracy', verbose=2)


In [24]:
gs_bpnn.fit(X_train, y_train)

# Use the best model found by GridSearchCV
best_model = gs_bpnn.best_estimator_

# Print best parameters and best score
print("Best parameters found:", gs_bpnn.best_params_)
print("Best score:", gs_bpnn.best_score_)

Fitting 3 folds for each of 12 candidates, totalling 36 fits
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.1899[0m       [32m0.9221[0m        [35m0.1796[0m  9.3606
      2        [36m0.1717[0m       [32m0.9266[0m        [35m0.1742[0m  9.2465
      3        [36m0.1652[0m       [32m0.9283[0m        [35m0.1730[0m  9.7945
      4        [36m0.1609[0m       [32m0.9298[0m        [35m0.1688[0m  9.5130
      5        [36m0.1572[0m       0.9293        0.1746  9.8519
      6        [36m0.1545[0m       [32m0.9305[0m        0.1710  10.2799
      7        [36m0.1523[0m       [32m0.9314[0m        0.1703  9.9122
      8        [36m0.1500[0m       0.9282        0.1728  10.1737
Stopping since valid_loss has not improved in the last 5 epochs.
[CV] END ....lr=0.01, max_epochs=10, module__hidden_dim1=128; total time= 1.5min
  epoch    train_loss    valid_acc    valid_loss     dur
--

In [66]:
torch.save(gs_bpnn.best_estimator_.module_.state_dict(), 'bpnn_best_model.pth')

In [67]:
y_pred = best_model.predict(X_test)
y_pred_class = (y_pred > 0.5).astype(int)
acc = accuracy_score(y_test.numpy(), y_pred_class)
precision = precision_score(y_test.numpy(), y_pred_class)
recall = recall_score(y_test.numpy(), y_pred_class)
f1 = f1_score(y_test.numpy(), y_pred_class)

In [68]:
print(f"Back Propagation Neural Network - Test Accuracy: {acc:.4f}")
print(f"Back Propagation Neural Network - Test Precision: {precision:.4f}")
print(f"Back Propagation Neural Network - Test Recall: {recall:.4f}")
print(f"Back Propagation Neural Network - Test F1 Score: {f1:.4f}")
print(classification_report(y_test.numpy(), y_pred_class))

Back Propagation Neural Network - Test Accuracy: 0.9324
Back Propagation Neural Network - Test Precision: 0.8159
Back Propagation Neural Network - Test Recall: 0.9432
Back Propagation Neural Network - Test F1 Score: 0.8749
              precision    recall  f1-score   support

         0.0       0.98      0.93      0.95     75727
         1.0       0.82      0.94      0.87     25360

    accuracy                           0.93    101087
   macro avg       0.90      0.94      0.91    101087
weighted avg       0.94      0.93      0.93    101087

