In [5]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Concatenate, Layer
from tensorflow.keras.optimizers import RMSprop
from gensim.models import Word2Vec
from sklearn.metrics import classification_report
import tensorflow as tf


In [11]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool, GATConv
from torch_geometric.utils import dense_to_sparse
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

def preprocess_code_snippets(code_snippets):
    from nltk.tokenize import word_tokenize
    from gensim.models import Word2Vec

    tokenized = [word_tokenize(code) for code in code_snippets]
    model = Word2Vec(tokenized, vector_size=128, window=5, min_count=1, workers=4)

    embeddings = [
        np.mean([model.wv[token] for token in tokens if token in model.wv], axis=0)
        for tokens in tokenized
    ]

    return np.array(embeddings)

def generate_pdg(feature_matrix):
    num_nodes = feature_matrix.shape[0]
    edge_index = torch.combinations(torch.arange(num_nodes), r=2).T
    edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=1)
    return edge_index

def prepare_graph_data(features, labels):
    data_list = []
    for i in range(len(features)):
        x = torch.tensor(features[i], dtype=torch.float).unsqueeze(0)
        edge_index = generate_pdg(x)
        y = torch.tensor([labels[i]], dtype=torch.long)
        data = Data(x=x, edge_index=edge_index, y=y)
        data_list.append(data)
    return data_list

class VulnerabilityGCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(VulnerabilityGCN, self).__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=4, concat=True)
        self.conv2 = GATConv(hidden_dim * 4, hidden_dim, heads=1, concat=False)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index).relu()
        x = self.conv2(x, edge_index).relu()
        x = global_mean_pool(x, data.batch)
        x = self.fc(x)
        return x

def load_data(train_path, test_path):
    train_df = pd.read_csv(train_path)
    test_df = pd.read_csv(test_path)

    X_train = train_df['functionSource']
    y_train = train_df['numeric']

    X_test = test_df['functionSource']
    y_test = test_df['numeric']

    train_embeddings = preprocess_code_snippets(X_train)
    test_embeddings = preprocess_code_snippets(X_test)

    train_data = prepare_graph_data(train_embeddings, y_train)
    test_data = prepare_graph_data(test_embeddings, y_test)

    return train_data, test_data

def train(model, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(train_loader)

def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    y_true, y_pred = [], []

    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data)
            pred = out.argmax(dim=1)
            correct += (pred == data.y).sum().item()
            total += data.num_graphs
            y_true.extend(data.y.cpu().numpy())
            y_pred.extend(pred.cpu().numpy())

    accuracy = correct / total
    print(classification_report(y_true, y_pred))
    return accuracy



train_path ='/Users/akter/Documents/MSR update/ICSME version/Train-Test daatset/Splited dataset/train_label_dataset.csv'
test_path= '/Users/akter/Documents/MSR update/ICSME version/Train-Test daatset/Splited dataset/test_label_dataset.csv'

train_data, test_data = load_data(train_path, test_path)

train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32)

model = VulnerabilityGCN(input_dim=128, hidden_dim=64, output_dim=5)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

for epoch in range(1, 21):
    loss = train(model, train_loader, optimizer, criterion)
    print(f"Epoch {epoch}, Loss: {loss:.4f}")

print("Performance on Test Set:")
evaluate(model, test_loader)




Epoch 1, Loss: 1.3084
Epoch 2, Loss: 1.1952
Epoch 3, Loss: 1.1525
Epoch 4, Loss: 1.1247
Epoch 5, Loss: 1.1032
Epoch 6, Loss: 1.0930
Epoch 7, Loss: 1.0764
Epoch 8, Loss: 1.0640
Epoch 9, Loss: 1.0568
Epoch 10, Loss: 1.0465
Epoch 11, Loss: 1.0355
Epoch 12, Loss: 1.0253
Epoch 13, Loss: 1.0212
Epoch 14, Loss: 1.0141
Epoch 15, Loss: 1.0099
Epoch 16, Loss: 1.0005
Epoch 17, Loss: 0.9973
Epoch 18, Loss: 0.9867
Epoch 19, Loss: 0.9850
Epoch 20, Loss: 0.9810
Performance on Test Set:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       934
           1       0.24      0.35      0.29       860
           2       0.00      0.00      0.00       918
           3       0.29      0.12      0.17       909
           4       0.18      0.58      0.27       879

    accuracy                           0.20      4500
   macro avg       0.14      0.21      0.15      4500
weighted avg       0.14      0.20      0.14      4500



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


0.20466666666666666

In [13]:
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef,
    cohen_kappa_score,
    roc_auc_score,
    mean_squared_error,
    mean_absolute_error,
)

Y_pred = []
Y_true = []

model.eval()
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        predictions = output.argmax(dim=1).cpu().numpy()  
        Y_pred.extend(predictions)
        Y_true.extend(data.y.cpu().numpy())  
        
Y_pred = np.array(Y_pred)
Y_true = np.array(Y_true)

print("Classification Report:")
print(classification_report(Y_true, Y_pred, zero_division=0))

conf_matrix = confusion_matrix(Y_true, Y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)


accuracy = accuracy_score(Y_true, Y_pred)
print(f"\nAccuracy: {accuracy}")

precision_macro = precision_score(Y_true, Y_pred, average="macro", zero_division=0)
recall_macro = recall_score(Y_true, Y_pred, average="macro", zero_division=0)
f1_macro = f1_score(Y_true, Y_pred, average="macro", zero_division=0)

precision_weighted = precision_score(Y_true, Y_pred, average="weighted", zero_division=0)
recall_weighted = recall_score(Y_true, Y_pred, average="weighted", zero_division=0)
f1_weighted = f1_score(Y_true, Y_pred, average="weighted", zero_division=0)

print(f"\nPrecision (Macro): {precision_macro}")
print(f"Recall (Macro): {recall_macro}")
print(f"F1 Score (Macro): {f1_macro}")

print(f"\nPrecision (Weighted): {precision_weighted}")
print(f"Recall (Weighted): {recall_weighted}")
print(f"F1 Score (Weighted): {f1_weighted}")

mcc = matthews_corrcoef(Y_true, Y_pred)
print(f"\nMatthews Correlation Coefficient: {mcc}")

kappa = cohen_kappa_score(Y_true, Y_pred)
print(f"\nCohen's Kappa Score: {kappa}")

try:
    roc_auc = roc_auc_score(pd.get_dummies(Y_true), pd.get_dummies(Y_pred), multi_class="ovr", average="macro")
    print(f"\nROC AUC Score (Macro): {roc_auc}")
except ValueError:
    print("\nROC AUC Score could not be computed due to label imbalance or insufficient classes.")

mse = mean_squared_error(Y_true, Y_pred)
mae = mean_absolute_error(Y_true, Y_pred)

print(f"\nMean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")


metrics_summary = {
    "Accuracy": accuracy,
    "Precision (Macro)": precision_macro,
    "Recall (Macro)": recall_macro,
    "F1 Score (Macro)": f1_macro,
    "Precision (Weighted)": precision_weighted,
    "Recall (Weighted)": recall_weighted,
    "F1 Score (Weighted)": f1_weighted,
    "Matthews Correlation Coefficient (MCC)": mcc,
    "Cohen's Kappa": kappa,
    "Mean Squared Error (MSE)": mse,
    "Mean Absolute Error (MAE)": mae,
}

if "roc_auc" in locals():
    metrics_summary["ROC AUC Score (Macro)"] = roc_auc

print("\nMetrics Summary:")
for metric, value in metrics_summary.items():
    print(f"{metric}: {value}")


Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       934
           1       0.24      0.35      0.29       860
           2       0.00      0.00      0.00       918
           3       0.29      0.12      0.17       909
           4       0.18      0.58      0.27       879

    accuracy                           0.20      4500
   macro avg       0.14      0.21      0.15      4500
weighted avg       0.14      0.20      0.14      4500


Confusion Matrix:
[[  0 260   0  84 590]
 [  0 303   0  51 506]
 [  0 171   0  81 666]
 [  0 206   0 110 593]
 [  0 312   0  59 508]]

Accuracy: 0.20466666666666666

Precision (Macro): 0.1410326641885732
Recall (Macro): 0.21025342958138976
F1 Score (Macro): 0.14569196685212654

Precision (Weighted): 0.1386248321930458
Recall (Weighted): 0.20466666666666666
F1 Score (Weighted): 0.14221443409959064

Matthews Correlation Coefficient: 0.015552240694647222

Cohen's Kappa Score: 0.0123407

In [15]:

metrics_df = pd.DataFrame([metrics_summary])
metrics_df.to_csv("/Users/akter/Documents/MSR update/ICSME version//All Embeddings dataset/SOTA extracted dataset/SOTA_performances/IVDet.csv", index=False)