In [33]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, Concatenate, Layer
from tensorflow.keras.optimizers import RMSprop
from gensim.models import Word2Vec
from sklearn.metrics import classification_report
import tensorflow as tf
# train = pd.read_csv('/Users/akter/Documents/MSR update//ICSME version/Test cases/Deviing quemu/Devign_quemu_train.csv')
# test = pd.read_csv('/Users/akter/Documents/MSR update/ICSME version/Train-Test daatset/Splited dataset/test_label_dataset.csv')

### on devign data

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, cohen_kappa_score, mean_squared_error, mean_absolute_error
from gensim.models import Word2Vec

class GGGNN(nn.Module):
    def __init__(self, feature_dim_size, hidden_size, num_GNN_layers, num_classes, dropout):
        super(GGGNN, self).__init__()
        self.fc = nn.Linear(feature_dim_size, hidden_size)
        self.dropout = nn.Dropout(dropout)
        self.layers = nn.ModuleList([nn.Linear(hidden_size, hidden_size) for _ in range(num_GNN_layers)])
        self.classifier = nn.Linear(hidden_size, num_classes)

    def forward(self, x, adj, adj_mask):
        h = self.fc(x)
        for layer in self.layers:
            h = torch.relu(layer(h))
        h = self.dropout(h)
        return self.classifier(h.mean(dim=1))

class DevignModel(nn.Module):
    def __init__(self, config, args):
        super(DevignModel, self).__init__()
        self.args = args
        self.w_embeddings = torch.randn(args.vocab_size, args.feature_dim_size)
        self.gnn = GGGNN(feature_dim_size=args.feature_dim_size, hidden_size=args.hidden_size, num_GNN_layers=args.num_GNN_layers, num_classes=args.hidden_size, dropout=config.hidden_dropout_prob)
        self.conv1 = nn.Conv1d(args.hidden_size, args.hidden_size, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv1d(args.hidden_size, args.hidden_size, kernel_size=1)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc = nn.Linear(args.hidden_size, args.num_classes)

    def forward(self, input_ids, labels=None):
        batch_size, seq_len = input_ids.shape
        adj, x_feature = build_graph(input_ids.cpu().numpy(), self.w_embeddings.numpy())
        adj, adj_mask = preprocess_adj(adj)
        adj_feature = preprocess_features(x_feature)
        adj = torch.from_numpy(adj).float().to(input_ids.device)
        adj_mask = torch.from_numpy(adj_mask).float().to(input_ids.device)
        adj_feature = torch.from_numpy(adj_feature).float().to(input_ids.device)
        outputs = self.gnn(adj_feature, adj, adj_mask)
        outputs = outputs.unsqueeze(2)
        if outputs.size(2) >= 3:
            outputs = self.pool1(torch.relu(self.conv1(outputs))).squeeze(2)
            if outputs.size(2) >= 2:
                outputs = self.pool2(torch.relu(self.conv2(outputs))).squeeze(2)
        else:
            outputs = outputs.squeeze(2)
        logits = self.fc(outputs)
        prob = torch.softmax(logits, dim=1)
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
            return loss, prob
        return prob

class CodeDataset(Dataset):
    def __init__(self, csv_file, tokenizer, max_len, word2vec_model):
        self.data = pd.read_csv(csv_file)
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.word2vec = word2vec_model

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        code = self.data.iloc[idx]['input']
        label = int(self.data.iloc[idx]['output'])
        tokens = self.tokenizer(code, max_length=self.max_len, truncation=True, padding='max_length', return_tensors="pt")
        input_ids = tokens['input_ids'].squeeze(0)
        return input_ids, torch.tensor(label, dtype=torch.long)

def build_graph(input_ids, w_embeddings):
    batch_size, seq_len = input_ids.shape
    vocab_size, feature_dim_size = w_embeddings.shape
    x_feature = w_embeddings[input_ids]
    num_nodes = seq_len
    adj = np.ones((batch_size, num_nodes, num_nodes))
    return adj, x_feature

def preprocess_adj(adj):
    adj_mask = np.ones_like(adj)
    return adj, adj_mask

def preprocess_features(features):
    return features

class Config:
    hidden_dropout_prob = 0.1

class Args:
    vocab_size = 10000
    feature_dim_size = 128
    hidden_size = 200
    num_GNN_layers = 6
    num_classes = 2
    max_len = 128
    batch_size = 128
    learning_rate = 1e-4
    epochs = 20

if __name__ == "__main__":
    config = Config()
    args = Args()
    word2vec_model = Word2Vec(vector_size=args.feature_dim_size, min_count=1)
    tokenizer = lambda x, max_length, truncation, padding, return_tensors: {
        'input_ids': torch.randint(0, args.vocab_size, (1, max_length))
    }
    train_dataset = CodeDataset(csv_file="/Users/user01/fahim/icsme/Devign/devign_vultest.csv", tokenizer=tokenizer, max_len=args.max_len, word2vec_model=word2vec_model)
    test_dataset = CodeDataset(csv_file="/Users/user01/fahim/icsme/Devign/devign_vultest.csv", tokenizer=tokenizer, max_len=args.max_len, word2vec_model=word2vec_model)
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)
    model = DevignModel(config, args).float()
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in train_loader:
            input_ids, labels = batch
            input_ids = input_ids.to(torch.long).to(model.w_embeddings.device)
            labels = labels.to(torch.long).to(model.w_embeddings.device)
            optimizer.zero_grad()
            loss, _ = model(input_ids, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}/{args.epochs}, Loss: {total_loss / len(train_loader):.4f}")



Epoch 1/20, Loss: 0.6909
Epoch 2/20, Loss: 0.6893
Epoch 3/20, Loss: 0.6892
Epoch 4/20, Loss: 0.6893
Epoch 5/20, Loss: 0.6892
Epoch 6/20, Loss: 0.6887
Epoch 7/20, Loss: 0.6892
Epoch 8/20, Loss: 0.6893
Epoch 9/20, Loss: 0.6891
Epoch 10/20, Loss: 0.6895
Epoch 11/20, Loss: 0.6894
Epoch 12/20, Loss: 0.6887
Epoch 13/20, Loss: 0.6893
Epoch 14/20, Loss: 0.6892
Epoch 15/20, Loss: 0.6887
Epoch 16/20, Loss: 0.6890
Epoch 17/20, Loss: 0.6895
Epoch 18/20, Loss: 0.6888
Epoch 19/20, Loss: 0.6893
Epoch 20/20, Loss: 0.6889


In [12]:
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef,
    cohen_kappa_score,
    roc_auc_score,
    mean_squared_error,
    mean_absolute_error,
)

Y_pred = []
Y_true = []

model.eval()

with torch.no_grad():
    for data in test_loader:
        input_ids, labels = data
        input_ids = input_ids.to(torch.long).to(model.w_embeddings.device)
        labels = labels.to(torch.long).to(model.w_embeddings.device)

        output = model(input_ids)
        predictions = output.argmax(dim=1).cpu().numpy()  
        Y_pred.extend(predictions)
        Y_true.extend(labels.cpu().numpy())  

Y_pred = np.array(Y_pred)
Y_true = np.array(Y_true)

print("Classification Report:")
print(classification_report(Y_true, Y_pred, zero_division=0))

conf_matrix = confusion_matrix(Y_true, Y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

accuracy = accuracy_score(Y_true, Y_pred)
print(f"\nAccuracy: {accuracy}")

precision_macro = precision_score(Y_true, Y_pred, average="macro", zero_division=0)
recall_macro = recall_score(Y_true, Y_pred, average="macro", zero_division=0)
f1_macro = f1_score(Y_true, Y_pred, average="macro", zero_division=0)

precision_weighted = precision_score(Y_true, Y_pred, average="weighted", zero_division=0)
recall_weighted = recall_score(Y_true, Y_pred, average="weighted", zero_division=0)
f1_weighted = f1_score(Y_true, Y_pred, average="weighted", zero_division=0)

print(f"\nPrecision (Macro): {precision_macro}")
print(f"Recall (Macro): {recall_macro}")
print(f"F1 Score (Macro): {f1_macro}")
print(f"\nPrecision (Weighted): {precision_weighted}")
print(f"Recall (Weighted): {recall_weighted}")
print(f"F1 Score (Weighted): {f1_weighted}")

mcc = matthews_corrcoef(Y_true, Y_pred)
print(f"\nMatthews Correlation Coefficient: {mcc}")

kappa = cohen_kappa_score(Y_true, Y_pred)
print(f"\nCohen's Kappa Score: {kappa}")

try:
    roc_auc = roc_auc_score(pd.get_dummies(Y_true), pd.get_dummies(Y_pred), multi_class="ovr", average="macro")
    print(f"\nROC AUC Score (Macro): {roc_auc}")
except ValueError:
    print("\nROC AUC Score could not be computed due to label imbalance or insufficient classes.")

mse = mean_squared_error(Y_true, Y_pred)
mae = mean_absolute_error(Y_true, Y_pred)

print(f"\nMean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")

metrics_summary = {
    "Accuracy": accuracy,
    "Precision (Macro)": precision_macro,
    "Recall (Macro)": recall_macro,
    "F1 Score (Macro)": f1_macro,
    "Precision (Weighted)": precision_weighted,
    "Recall (Weighted)": recall_weighted,
    "F1 Score (Weighted)": f1_weighted,
    "Matthews Correlation Coefficient (MCC)": mcc,
    "Cohen's Kappa": kappa,
    "Mean Squared Error (MSE)": mse,
    "Mean Absolute Error (MAE)": mae,
}

if "roc_auc" in locals():
    metrics_summary["ROC AUC Score (Macro)"] = roc_auc

print("\nMetrics Summary:")
for metric, value in metrics_summary.items():
    print(f"{metric}: {value}")



Classification Report:
              precision    recall  f1-score   support

           0       0.55      1.00      0.71      1021
           1       0.00      0.00      0.00       852

    accuracy                           0.55      1873
   macro avg       0.27      0.50      0.35      1873
weighted avg       0.30      0.55      0.38      1873


Confusion Matrix:
[[1021    0]
 [ 852    0]]

Accuracy: 0.5451147891083823

Precision (Macro): 0.27255739455419115
Recall (Macro): 0.5
F1 Score (Macro): 0.35279889426399447

Precision (Weighted): 0.2971501333046761
Recall (Weighted): 0.5451147891083823
F1 Score (Weighted): 0.3846317896887756

Matthews Correlation Coefficient: 0.0

Cohen's Kappa Score: 0.0

ROC AUC Score (Macro): 0.5

Mean Squared Error (MSE): 0.4548852108916177
Mean Absolute Error (MAE): 0.4548852108916177

Metrics Summary:
Accuracy: 0.5451147891083823
Precision (Macro): 0.27255739455419115
Recall (Macro): 0.5
F1 Score (Macro): 0.35279889426399447
Precision (Weighted): 0.297

### on draper data

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, cohen_kappa_score, mean_squared_error, mean_absolute_error
from gensim.models import Word2Vec

class GGGNN(nn.Module):
    def __init__(self, feature_dim_size, hidden_size, num_GNN_layers, num_classes, dropout):
        super(GGGNN, self).__init__()
        self.fc = nn.Linear(feature_dim_size, hidden_size)
        self.dropout = nn.Dropout(dropout)
        self.layers = nn.ModuleList([nn.Linear(hidden_size, hidden_size) for _ in range(num_GNN_layers)])
        self.classifier = nn.Linear(hidden_size, num_classes)

    def forward(self, x, adj, adj_mask):
        h = self.fc(x)
        for layer in self.layers:
            h = torch.relu(layer(h))
        h = self.dropout(h)
        return self.classifier(h.mean(dim=1))

class DevignModel(nn.Module):
    def __init__(self, config, args):
        super(DevignModel, self).__init__()
        self.args = args
        self.w_embeddings = torch.randn(args.vocab_size, args.feature_dim_size)
        self.gnn = GGGNN(feature_dim_size=args.feature_dim_size, hidden_size=args.hidden_size, num_GNN_layers=args.num_GNN_layers, num_classes=args.hidden_size, dropout=config.hidden_dropout_prob)
        self.conv1 = nn.Conv1d(args.hidden_size, args.hidden_size, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=3, stride=2)
        self.conv2 = nn.Conv1d(args.hidden_size, args.hidden_size, kernel_size=1)
        self.pool2 = nn.MaxPool1d(kernel_size=2, stride=2)
        self.fc = nn.Linear(args.hidden_size, args.num_classes)

    def forward(self, input_ids, labels=None):
        batch_size, seq_len = input_ids.shape
        adj, x_feature = build_graph(input_ids.cpu().numpy(), self.w_embeddings.numpy())
        adj, adj_mask = preprocess_adj(adj)
        adj_feature = preprocess_features(x_feature)
        adj = torch.from_numpy(adj).float().to(input_ids.device)
        adj_mask = torch.from_numpy(adj_mask).float().to(input_ids.device)
        adj_feature = torch.from_numpy(adj_feature).float().to(input_ids.device)
        outputs = self.gnn(adj_feature, adj, adj_mask)
        outputs = outputs.unsqueeze(2)
        if outputs.size(2) >= 3:
            outputs = self.pool1(torch.relu(self.conv1(outputs))).squeeze(2)
            if outputs.size(2) >= 2:
                outputs = self.pool2(torch.relu(self.conv2(outputs))).squeeze(2)
        else:
            outputs = outputs.squeeze(2)
        logits = self.fc(outputs)
        prob = torch.softmax(logits, dim=1)
        if labels is not None:
            loss = nn.CrossEntropyLoss()(logits, labels)
            return loss, prob
        return prob

class CodeDataset(Dataset):
    def __init__(self, csv_file, tokenizer, max_len, word2vec_model):
        self.data = pd.read_csv(csv_file)
        self.tokenizer = tokenizer
        self.max_len = max_len
        self.word2vec = word2vec_model

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        code = self.data.iloc[idx]['functionSource']
        label = int(self.data.iloc[idx]['numeric'])
        tokens = self.tokenizer(code, max_length=self.max_len, truncation=True, padding='max_length', return_tensors="pt")
        input_ids = tokens['input_ids'].squeeze(0)
        return input_ids, torch.tensor(label, dtype=torch.long)

def build_graph(input_ids, w_embeddings):
    batch_size, seq_len = input_ids.shape
    vocab_size, feature_dim_size = w_embeddings.shape
    x_feature = w_embeddings[input_ids]
    num_nodes = seq_len
    adj = np.ones((batch_size, num_nodes, num_nodes))
    return adj, x_feature

def preprocess_adj(adj):
    adj_mask = np.ones_like(adj)
    return adj, adj_mask

def preprocess_features(features):
    return features

class Config:
    hidden_dropout_prob = 0.1

class Args:
    vocab_size = 10000
    feature_dim_size = 128
    hidden_size = 200
    num_GNN_layers = 6
    num_classes = 5
    max_len = 128
    batch_size = 128
    learning_rate = 1e-4
    epochs = 20

if __name__ == "__main__":
    config = Config()
    args = Args()
    word2vec_model = Word2Vec(vector_size=args.feature_dim_size, min_count=1)
    tokenizer = lambda x, max_length, truncation, padding, return_tensors: {
        'input_ids': torch.randint(0, args.vocab_size, (1, max_length))
    }
    train_dataset = CodeDataset(csv_file="/Users/user01/fahim/icsme/train_label_dataset.csv", tokenizer=tokenizer, max_len=args.max_len, word2vec_model=word2vec_model)
    test_dataset = CodeDataset(csv_file="/Users/user01/fahim/icsme/test_label_dataset.csv", tokenizer=tokenizer, max_len=args.max_len, word2vec_model=word2vec_model)
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False)
    model = DevignModel(config, args).float()
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    for epoch in range(args.epochs):
        total_loss = 0
        model.train()
        for batch in train_loader:
            input_ids, labels = batch
            input_ids = input_ids.to(torch.long).to(model.w_embeddings.device)
            labels = labels.to(torch.long).to(model.w_embeddings.device)
            optimizer.zero_grad()
            loss, _ = model(input_ids, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {epoch + 1}/{args.epochs}, Loss: {total_loss / len(train_loader):.4f}")


Epoch 1/20, Loss: 1.6098
Epoch 2/20, Loss: 1.6096
Epoch 3/20, Loss: 1.6096
Epoch 4/20, Loss: 1.6096
Epoch 5/20, Loss: 1.6097
Epoch 6/20, Loss: 1.6096
Epoch 7/20, Loss: 1.6096
Epoch 8/20, Loss: 1.6096
Epoch 9/20, Loss: 1.6096
Epoch 10/20, Loss: 1.6096
Epoch 11/20, Loss: 1.6096
Epoch 12/20, Loss: 1.6096
Epoch 13/20, Loss: 1.6096
Epoch 14/20, Loss: 1.6095
Epoch 15/20, Loss: 1.6096
Epoch 16/20, Loss: 1.6096
Epoch 17/20, Loss: 1.6096
Epoch 18/20, Loss: 1.6095
Epoch 19/20, Loss: 1.6096
Epoch 20/20, Loss: 1.6095


In [29]:
import numpy as np
import pandas as pd
import torch
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    matthews_corrcoef,
    cohen_kappa_score,
    roc_auc_score,
    mean_squared_error,
    mean_absolute_error,
)

# Storing predictions and ground truths
Y_pred = []
Y_true = []
Y_prob = []

model.eval()

with torch.no_grad():
    for data in test_loader:
        input_ids, labels = data
        input_ids = input_ids.to(torch.long).to(model.w_embeddings.device)
        labels = labels.to(torch.long).to(model.w_embeddings.device)

        output = model(input_ids)  # Raw logits
        probabilities = torch.nn.functional.softmax(output, dim=1)  # Convert logits to probabilities
        predictions = output.argmax(dim=1).cpu().numpy()

        Y_pred.extend(predictions)
        Y_true.extend(labels.cpu().numpy())
        Y_prob.extend(probabilities.cpu().numpy())  # Store probability scores

# Convert to NumPy arrays
Y_pred = np.array(Y_pred)
Y_true = np.array(Y_true)
Y_prob = np.array(Y_prob)

# Classification report
print("Classification Report:")
print(classification_report(Y_true, Y_pred, zero_division=0))

# Confusion matrix
conf_matrix = confusion_matrix(Y_true, Y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

# Extracting TP, TN, FP, FN for binary classification
if conf_matrix.shape == (2, 2):  
    TN, FP, FN, TP = conf_matrix.ravel()
    
    # Sensitivity (Recall) - True Positive Rate (TPR)
    sensitivity = TP / (TP + FN) if (TP + FN) != 0 else 0
    
    # Specificity - True Negative Rate (TNR)
    specificity = TN / (TN + FP) if (TN + FP) != 0 else 0
    
    # False Positive Rate (FPR)
    fpr = FP / (FP + TN) if (FP + TN) != 0 else 0

    # False Negative Rate (FNR)
    fnr = FN / (FN + TP) if (FN + TP) != 0 else 0

    print(f"\nTrue Positives (TP): {TP}")
    print(f"True Negatives (TN): {TN}")
    print(f"False Positives (FP): {FP}")
    print(f"False Negatives (FN): {FN}")
    print(f"\nSensitivity (Recall / TPR): {sensitivity}")
    print(f"Specificity (TNR): {specificity}")
    print(f"False Positive Rate (FPR): {fpr}")
    print(f"False Negative Rate (FNR): {fnr}")

# Accuracy
accuracy = accuracy_score(Y_true, Y_pred)
print(f"\nAccuracy: {accuracy}")

# Precision, Recall, F1-score (Macro & Weighted)
precision_macro = precision_score(Y_true, Y_pred, average="macro", zero_division=0)
recall_macro = recall_score(Y_true, Y_pred, average="macro", zero_division=0)
f1_macro = f1_score(Y_true, Y_pred, average="macro", zero_division=0)

precision_weighted = precision_score(Y_true, Y_pred, average="weighted", zero_division=0)
recall_weighted = recall_score(Y_true, Y_pred, average="weighted", zero_division=0)
f1_weighted = f1_score(Y_true, Y_pred, average="weighted", zero_division=0)

print(f"\nPrecision (Macro): {precision_macro}")
print(f"Recall (Macro): {recall_macro}")
print(f"F1 Score (Macro): {f1_macro}")
print(f"\nPrecision (Weighted): {precision_weighted}")
print(f"Recall (Weighted): {recall_weighted}")
print(f"F1 Score (Weighted): {f1_weighted}")

# MCC & Cohen's Kappa
mcc = matthews_corrcoef(Y_true, Y_pred)
print(f"\nMatthews Correlation Coefficient: {mcc}")

kappa = cohen_kappa_score(Y_true, Y_pred)
print(f"\nCohen's Kappa Score: {kappa}")

# ROC AUC Score (Multiclass)
try:
    roc_auc = roc_auc_score(pd.get_dummies(Y_true), Y_prob, multi_class="ovr", average="macro")
    print(f"\nROC AUC Score (Macro): {roc_auc}")
except ValueError as e:
    print(f"\nROC AUC Score could not be computed: {e}")

# Mean Squared Error & Mean Absolute Error
mse = mean_squared_error(Y_true, Y_pred)
mae = mean_absolute_error(Y_true, Y_pred)

print(f"\nMean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")

# Summary of all metrics
metrics_summary = {
    "Accuracy": accuracy,
    "Precision (Macro)": precision_macro,
    "Recall (Macro)": recall_macro,
    "F1 Score (Macro)": f1_macro,
    "Precision (Weighted)": precision_weighted,
    "Recall (Weighted)": recall_weighted,
    "F1 Score (Weighted)": f1_weighted,
    "Matthews Correlation Coefficient (MCC)": mcc,
    "Cohen's Kappa": kappa,
    "Mean Squared Error (MSE)": mse,
    "Mean Absolute Error (MAE)": mae,
}

if "roc_auc" in locals():
    metrics_summary["ROC AUC Score (Macro)"] = roc_auc

if conf_matrix.shape == (2, 2):  # Include these metrics only for binary classification
    metrics_summary.update({
        "True Positives (TP)": TP,
        "True Negatives (TN)": TN,
        "False Positives (FP)": FP,
        "False Negatives (FN)": FN,
        "Sensitivity (Recall / TPR)": sensitivity,
        "Specificity (TNR)": specificity,
        "False Positive Rate (FPR)": fpr,
        "False Negative Rate (FNR)": fnr,
    })

print("\nMetrics Summary:")
for metric, value in metrics_summary.items():
    print(f"{metric}: {value}")


Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       934
           1       0.00      0.00      0.00       860
           2       0.00      0.00      0.00       918
           3       0.00      0.00      0.00       909
           4       0.20      1.00      0.33       879

    accuracy                           0.20      4500
   macro avg       0.04      0.20      0.07      4500
weighted avg       0.04      0.20      0.06      4500


Confusion Matrix:
[[  0   0   0   0 934]
 [  0   0   0   0 860]
 [  0   0   0   0 918]
 [  0   0   0   0 909]
 [  0   0   0   0 879]]

Accuracy: 0.19533333333333333

Precision (Macro): 0.039066666666666666
Recall (Macro): 0.2
F1 Score (Macro): 0.06536530953708868

Precision (Weighted): 0.03815511111111111
Recall (Weighted): 0.19533333333333333
F1 Score (Weighted): 0.06384011898122327

Matthews Correlation Coefficient: 0.0

Cohen's Kappa Score: 0.0

ROC AUC Score (Macro): 0.502125026

In [31]:
# Compute confusion matrix
conf_matrix = confusion_matrix(Y_true, Y_pred)

# Aggregate TP, FP, FN, TN across all classes
TP = np.diag(conf_matrix).sum()  # Sum of diagonal elements (True Positives)
FP = conf_matrix.sum(axis=0) - np.diag(conf_matrix)  # Column-wise sum minus TP
FN = conf_matrix.sum(axis=1) - np.diag(conf_matrix)  # Row-wise sum minus TP
TN = conf_matrix.sum() - (FP + FN + TP)  # Total elements minus others

# Compute overall sensitivity (Recall) & specificity
SP = TN.sum() / (TN.sum() + FP.sum())  # Specificity
SN = TP / (TP + FN).sum()  # Sensitivity (Recall)

# Display results
print(f"\nOverall True Positives (TP): {TP}")
print(f"Overall False Positives (FP): {FP.sum()}")
print(f"Overall False Negatives (FN): {FN.sum()}")
print(f"Overall True Negatives (TN): {TN.sum()}")
print(f"\nOverall Sensitivity (Recall): {SN}")
print(f"Overall Specificity: {SP}")

# Add these metrics to the summary
metrics_summary.update({
    "True Positives (TP)": TP,
    "False Positives (FP)": FP.sum(),
    "False Negatives (FN)": FN.sum(),
    "True Negatives (TN)": TN.sum(),
    "Sensitivity (Recall)": SN,
    "Specificity": SP,
})

print("\nUpdated Metrics Summary:")
for metric, value in metrics_summary.items():
    print(f"{metric}: {value}")



Overall True Positives (TP): 879
Overall False Positives (FP): 3621
Overall False Negatives (FN): 3621
Overall True Negatives (TN): 10863

Overall Sensitivity (Recall): 0.10965568862275449
Overall Specificity: 0.75

Updated Metrics Summary:
Accuracy: 0.19533333333333333
Precision (Macro): 0.039066666666666666
Recall (Macro): 0.2
F1 Score (Macro): 0.06536530953708868
Precision (Weighted): 0.03815511111111111
Recall (Weighted): 0.19533333333333333
F1 Score (Weighted): 0.06384011898122327
Matthews Correlation Coefficient (MCC): 0.0
Cohen's Kappa: 0.0
Mean Squared Error (MSE): 6.058888888888889
Mean Absolute Error (MAE): 2.0135555555555555
ROC AUC Score (Macro): 0.5021250261672968
True Positives (TP): 879
False Positives (FP): 3621
False Negatives (FN): 3621
True Negatives (TN): 10863
Sensitivity (Recall): 0.10965568862275449
Specificity: 0.75
