In [3]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Concatenate, Layer
from tensorflow.keras.optimizers import RMSprop
from gensim.models import Word2Vec
from sklearn.metrics import classification_report
import tensorflow as tf

import pandas as pd

import re
def remo(code):
    # Check if input is a string
    if not isinstance(code, str):
        return code
        
    code = re.sub(r'/\.?\*/', '', code, flags=re.DOTALL)
    code = re.sub(r'//.*?$', '', code, flags=re.MULTILINE)
    code = re.sub(r'^\s*[\n\r]', '', code, flags=re.MULTILINE)
    return code.strip()

# Apply the function to the 'func' column (or whatever your code column is named)

train = pd.read_csv("/Users/user01/fahim/icsme/train_label_dataset.csv")
test = pd.read_csv("/Users/user01/fahim/icsme/test_label_dataset.csv")
train['functionSource'] = train['functionSource'].apply(remo)
test['functionSource'] = test['functionSource'].apply(remo)

train = train[['functionSource', 'numeric']]
test = test[['functionSource', 'numeric']]


train.reset_index(drop=True, inplace=True)
test.reset_index(drop=True, inplace=True)

In [20]:
train.head()

Unnamed: 0,functionSource,label,numeric
0,"ng_mix_init(struct ng_devstate *dev, char *dev...",CWE-other,4
1,"execdotcmd(const char *cmd, char *defcmd, cons...",CWE-476,3
2,"setBlockIndent(QTextBlock block, int indent)\n...",CWE-119,0
3,efi_snp_notify ( struct net_device *netdev ) {...,CWE-119,0
4,"dir_ctrl(X509_LOOKUP *ctx, int cmd, const char...",CWE-other,4


In [5]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Concatenate, Layer
from tensorflow.keras.optimizers import RMSprop
from gensim.models import Word2Vec
from sklearn.metrics import classification_report
import tensorflow as tf

LEARNING_RATE = 0.001
BATCH_SIZE = 64
EPOCHS_GLOBAL_LOCAL = 60
EPOCHS_FUSION = 10
DROPOUT_RATE = 0.5

GLOBAL_NODES = 300
LOCAL_NODES = 200
FUSION_NODES = 500
VECTOR_DIM = 50
MAX_GLOBAL_LENGTH = 300
MAX_LOCAL_LENGTH = 100

def parse_code_gadgets(source_code):
    return [["int", "main", "(", ")", "{"], ["return", "0", ";", "}"]]

def parse_code_attention(code_gadgets):
    return [["main", "{"], ["return", "0"]]

class CustomAttention(Layer):
    def call(self, inputs):
        query, value = inputs, inputs
        scores = tf.matmul(query, value, transpose_b=True)
        distribution = tf.nn.softmax(scores, axis=-1)
        attention_output = tf.matmul(distribution, value)
        return attention_output

def convert_to_vectors(gadgets, word2vec_model, max_length):
    vectors = []
    for gadget in gadgets:
        gadget_vectors = [
            word2vec_model.wv[word] if word in word2vec_model.wv else np.zeros(word2vec_model.vector_size)
            for word in gadget
        ]
        if len(gadget_vectors) < max_length:
            gadget_vectors += [np.zeros(word2vec_model.vector_size)] * (max_length - len(gadget_vectors))
        else:
            gadget_vectors = gadget_vectors[:max_length]
        vectors.append(gadget_vectors)
    return np.array(vectors)

def global_feature_model():
    global_input = Input(shape=(MAX_GLOBAL_LENGTH, VECTOR_DIM), name="global_input")
    lstm_layer = LSTM(GLOBAL_NODES, activation="tanh", return_sequences=True)(global_input)
    attention_layer = CustomAttention()(lstm_layer)
    lstm_output = LSTM(GLOBAL_NODES, activation="tanh")(attention_layer)
    return Model(global_input, lstm_output, name="global_model")

def local_feature_model():
    local_input = Input(shape=(MAX_LOCAL_LENGTH, VECTOR_DIM), name="local_input")
    lstm_layer = LSTM(LOCAL_NODES, activation="tanh", return_sequences=True)(local_input)
    attention_layer = CustomAttention()(lstm_layer)
    lstm_output = LSTM(LOCAL_NODES, activation="tanh")(attention_layer)
    return Model(local_input, lstm_output, name="local_model")

def feature_fusion_model(global_model, local_model):
    global_input = global_model.input
    local_input = local_model.input
    merged_features = Concatenate()([global_model.output, local_model.output])
    dense_layer = Dense(FUSION_NODES, activation="tanh")(merged_features)
    dropout_layer = Dropout(DROPOUT_RATE)(dense_layer)
    output_layer = Dense(5, activation="softmax")(dropout_layer)
    return Model([global_input, local_input], output_layer, name="fusion_model")

def process_code_samples(df, column_name):
    source_code_samples = df[column_name].tolist()
    code_gadgets = [parse_code_gadgets(code) for code in source_code_samples]
    code_attentions = [parse_code_attention(gadgets) for gadgets in code_gadgets]
    corpus = [token for gadgets in code_gadgets for token in gadgets]
    word2vec_model = Word2Vec(sentences=corpus, vector_size=VECTOR_DIM, window=5, min_count=1, workers=4)
    X_global = np.array([convert_to_vectors(gadgets, word2vec_model, MAX_GLOBAL_LENGTH)[0] for gadgets in code_gadgets])
    X_local = np.array([convert_to_vectors(attentions, word2vec_model, MAX_LOCAL_LENGTH)[0] for attentions in code_attentions])
    return X_global, X_local, word2vec_model

X_train_global, X_train_local, word2vec_model = process_code_samples(train, "functionSource")
Y_train = train["numeric"].values
X_test_global, X_test_local, _ = process_code_samples(test, "functionSource")
Y_test = test["numeric"].values

global_model = global_feature_model()
local_model = local_feature_model()

global_model.compile(optimizer=RMSprop(LEARNING_RATE), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
global_model.fit(X_train_global, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS_GLOBAL_LOCAL)

local_model.compile(optimizer=RMSprop(LEARNING_RATE), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
local_model.fit(X_train_local, Y_train, batch_size=BATCH_SIZE, epochs=EPOCHS_GLOBAL_LOCAL)

fusion_model = feature_fusion_model(global_model, local_model)
fusion_model.compile(optimizer=RMSprop(LEARNING_RATE), loss="sparse_categorical_crossentropy", metrics=["accuracy"])
fusion_model.fit([X_train_global, X_train_local], Y_train, batch_size=BATCH_SIZEfcdvdvv, epochs=EPOCHS_FUSION)

Y_pred = fusion_model.predict([X_test_global, X_test_local])
Y_pred_classes = np.argmax(Y_pred, axis=1)
print(classification_report(Y_test, Y_pred_classes))



Epoch 1/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 1s/step - accuracy: 0.1993 - loss: 1.8955
Epoch 2/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m350s[0m 1s/step - accuracy: 0.1945 - loss: 1.6099
Epoch 3/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m351s[0m 1s/step - accuracy: 0.2054 - loss: 1.6096
Epoch 4/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m356s[0m 1s/step - accuracy: 0.1922 - loss: 1.6095
Epoch 5/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m331s[0m 1s/step - accuracy: 0.1982 - loss: 1.6095
Epoch 6/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m347s[0m 1s/step - accuracy: 0.2024 - loss: 1.6094
Epoch 7/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m346s[0m 1s/step - accuracy: 0.2018 - loss: 1.6095
Epoch 8/60
[1m282/282[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 1s/step - accuracy: 0.1974 - loss: 1.6095
Epoch 9/60
[1m282/282[0m [32

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [6]:
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef,
    cohen_kappa_score,
    roc_auc_score,
    mean_squared_error,
    mean_absolute_error,
)

Y_pred = fusion_model.predict([X_test_global, X_test_local])
Y_pred_classes = np.argmax(Y_pred, axis=1)

print("Classification Report:")
print(classification_report(Y_test, Y_pred_classes))

conf_matrix = confusion_matrix(Y_test, Y_pred_classes)
print("\nConfusion Matrix:")
print(conf_matrix)


accuracy = accuracy_score(Y_test, Y_pred_classes)
print(f"\nAccuracy: {accuracy}")

precision_macro = precision_score(Y_test, Y_pred_classes, average="macro", zero_division=0)
recall_macro = recall_score(Y_test, Y_pred_classes, average="macro", zero_division=0)
f1_macro = f1_score(Y_test, Y_pred_classes, average="macro", zero_division=0)

precision_weighted = precision_score(Y_test, Y_pred_classes, average="weighted", zero_division=0)
recall_weighted = recall_score(Y_test, Y_pred_classes, average="weighted", zero_division=0)
f1_weighted = f1_score(Y_test, Y_pred_classes, average="weighted", zero_division=0)

print(f"\nPrecision (Macro): {precision_macro}")
print(f"Recall (Macro): {recall_macro}")
print(f"F1 Score (Macro): {f1_macro}")

print(f"\nPrecision (Weighted): {precision_weighted}")
print(f"Recall (Weighted): {recall_weighted}")
print(f"F1 Score (Weighted): {f1_weighted}")

mcc = matthews_corrcoef(Y_test, Y_pred_classes)
print(f"\nMatthews Correlation Coefficient: {mcc}")

kappa = cohen_kappa_score(Y_test, Y_pred_classes)
print(f"\nCohen's Kappa Score: {kappa}")

try:
    roc_auc = roc_auc_score(pd.get_dummies(Y_test), Y_pred, multi_class="ovr", average="macro")
    print(f"\nROC AUC Score (Macro): {roc_auc}")
except ValueError:
    print("\nROC AUC Score could not be computed due to label imbalance or insufficient classes.")

# Error Metrics
mse = mean_squared_error(Y_test, Y_pred_classes)
mae = mean_absolute_error(Y_test, Y_pred_classes)

print(f"\nMean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")

# All Computed Metrics Summary
metrics_summary = {
    "Accuracy": accuracy,
    "Precision (Macro)": precision_macro,
    "Recall (Macro)": recall_macro,
    "F1 Score (Macro)": f1_macro,
    "Precision (Weighted)": precision_weighted,
    "Recall (Weighted)": recall_weighted,
    "F1 Score (Weighted)": f1_weighted,
    "Matthews Correlation Coefficient (MCC)": mcc,
    "Cohen's Kappa": kappa,
    "Mean Squared Error (MSE)": mse,
    "Mean Absolute Error (MAE)": mae,
}

if "roc_auc" in locals():
    metrics_summary["ROC AUC Score (Macro)"] = roc_auc

print("\nMetrics Summary:")
for metric, value in metrics_summary.items():
    print(f"{metric}: {value}")


[1m141/141[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 244ms/step
Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       934
           1       0.00      0.00      0.00       860
           2       0.00      0.00      0.00       918
           3       0.00      0.00      0.00       909
           4       0.20      1.00      0.33       879

    accuracy                           0.20      4500
   macro avg       0.04      0.20      0.07      4500
weighted avg       0.04      0.20      0.06      4500


Confusion Matrix:
[[  0   0   0   0 934]
 [  0   0   0   0 860]
 [  0   0   0   0 918]
 [  0   0   0   0 909]
 [  0   0   0   0 879]]

Accuracy: 0.19533333333333333

Precision (Macro): 0.039066666666666666
Recall (Macro): 0.2
F1 Score (Macro): 0.06536530953708868

Precision (Weighted): 0.03815511111111111
Recall (Weighted): 0.19533333333333333
F1 Score (Weighted): 0.06384011898122327

Matthews Correlation 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
import numpy as np
import pandas as pd

# Compute confusion matrix
conf_matrix = confusion_matrix(Y_test, Y_pred_classes)
num_classes = conf_matrix.shape[0]

# Initialize lists to store per-class metrics
TP = np.diag(conf_matrix)  # True Positives for each class
FP = np.sum(conf_matrix, axis=0) - TP  # False Positives for each class
FN = np.sum(conf_matrix, axis=1) - TP  # False Negatives for each class
TN = np.sum(conf_matrix) - (TP + FP + FN)  # True Negatives for each class

# Compute sensitivity (recall) and specificity for each class
sensitivity_per_class = TP / (TP + FN + 1e-10)  # Avoid division by zero
specificity_per_class = TN / (TN + FP + 1e-10)

# Compute overall (macro-average) sensitivity and specificity
overall_sensitivity = np.mean(sensitivity_per_class)
overall_specificity = np.mean(specificity_per_class)

# Print per-class metrics
for i in range(num_classes):
    print(f"Class {i}:")
    print(f"  TP: {TP[i]}, FP: {FP[i]}, FN: {FN[i]}, TN: {TN[i]}")
    print(f"  Sensitivity (Recall): {sensitivity_per_class[i]}")
    print(f"  Specificity: {specificity_per_class[i]}\n")

# Print overall metrics
print(f"\nOverall Sensitivity (Recall): {overall_sensitivity}")
print(f"Overall Specificity: {overall_specificity}")


Class 0:
  TP: 0, FP: 0, FN: 934, TN: 3566
  Sensitivity (Recall): 0.0
  Specificity: 0.9999999999999719

Class 1:
  TP: 0, FP: 0, FN: 860, TN: 3640
  Sensitivity (Recall): 0.0
  Specificity: 0.9999999999999725

Class 2:
  TP: 0, FP: 0, FN: 918, TN: 3582
  Sensitivity (Recall): 0.0
  Specificity: 0.999999999999972

Class 3:
  TP: 0, FP: 0, FN: 909, TN: 3591
  Sensitivity (Recall): 0.0
  Specificity: 0.9999999999999721

Class 4:
  TP: 879, FP: 3621, FN: 0, TN: 0
  Sensitivity (Recall): 0.9999999999998862
  Specificity: 0.0


Overall Sensitivity (Recall): 0.19999999999997725
Overall Specificity: 0.7999999999999777
