In [None]:
import pandas as pd
# Import required libraries for Random Forest feature selection and XGBoost
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import xgboost as xgb
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from sklearn.utils.class_weight import compute_class_weight
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from sklearn.metrics import matthews_corrcoef, make_scorer
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score, cross_validate
from sklearn.metrics import make_scorer
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import label_binarize
from sklearn.metrics import (
    confusion_matrix, precision_recall_curve, average_precision_score,
    precision_score, recall_score, f1_score, matthews_corrcoef
)
import time




In [47]:
#step1: Load the lightweight dataset "D:\AI-code\Personal-AI-Projects\PEMalwareClassification\Dataset\wine.csv"
main_df = pd.read_csv(r"D:\AI-code\Personal-AI-Projects\PEMalwareClassification\Dataset\wine.csv")

In [None]:
# Explore the data structure
print("Dataset shape:", main_df.shape)
print("\nColumns:", main_df.columns.tolist())
print("\nData types:")
print(main_df.dtypes.value_counts())

# Identify the target column (should be 'class' for wine dataset)
target_col = 'class' if 'class' in main_df.columns else 'Type'
print(f"\nTarget variable distribution (column: {target_col}):")
print(main_df[target_col].value_counts())
print("\nFirst few rows:")
display(main_df.head())

# Check for missing values
print(f"\nMissing values per column:")
missing_values = main_df.isnull().sum()
print(f"Total columns with missing values: {(missing_values > 0).sum()}")
print(f"Total missing values: {missing_values.sum()}")

# Show columns with most missing values (top 10)
if missing_values.sum() > 0:
    print("\nTop 10 columns with missing values:")
    print(missing_values[missing_values > 0].sort_values(ascending=False).head(10))

# Class distribution percentage plot on bar chart
class_distribution = main_df[target_col].value_counts(normalize=True) * 100
fig = go.Figure(data=[go.Bar(x=class_distribution.index, y=class_distribution.values)])
fig.update_layout(title='Class Distribution (%)', xaxis_title='Class', yaxis_title='Percentage')
fig.show()

# Separate features and target (exclude SHA256 if it exists)
cols_to_drop = [target_col]
if 'class' in main_df.columns:
    cols_to_drop.append('class')
    
X = main_df.drop(columns=cols_to_drop)
y = main_df[target_col]

print(f"\nOriginal data shape: {X.shape}")
print(f"Missing values in X: {X.isnull().sum().sum()}")

# Handle missing values - fill with 0 (appropriate for malware features)
X_clean = X.fillna(0)
print(f"Missing values after cleaning: {X_clean.isnull().sum().sum()}")

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
print(f"Encoded classes: {list(label_encoder.classes_)}")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_clean, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")


Dataset shape: (178, 14)

Columns: ['class', ' col1', ' col2', ' col3', ' col4', ' col5', ' col6', ' col7', ' col8', ' col9', ' col10', ' col11', ' col12', ' col13']

Data types:
float64    11
int64       3
Name: count, dtype: int64

Target variable distribution (column: class):
class
2    71
1    59
3    48
Name: count, dtype: int64

First few rows:
   class   col1   col2   col3   col4   col5   col6   col7   col8   col9  \
0      1  14.23   1.71   2.43   15.6    127   2.80   3.06   0.28   2.29   
1      1  13.20   1.78   2.14   11.2    100   2.65   2.76   0.26   1.28   
2      1  13.16   2.36   2.67   18.6    101   2.80   3.24   0.30   2.81   
3      1  14.37   1.95   2.50   16.8    113   3.85   3.49   0.24   2.18   
4      1  13.24   2.59   2.87   21.0    118   2.80   2.69   0.39   1.82   

    col10   col11   col12   col13  
0    5.64    1.04    3.92    1065  
1    4.38    1.05    3.40    1050  
2    5.68    1.03    3.17    1185  
3    7.80    0.86    3.45    1480  
4    4.32    1.0


Original data shape: (178, 13)
Missing values in X: 0
Missing values after cleaning: 0
Encoded classes: [1, 2, 3]
Training data shape: (142, 13)
Testing data shape: (36, 13)


In [None]:



print("Implementing XGBoost with class weight balancing...")
print(f"Target class distribution:")
unique_classes, counts = np.unique(y_train, return_counts=True)
for cls, count in zip(unique_classes, counts):
    print(f"  Class {cls} ({label_encoder.classes_[cls]}): {count} samples ({count/len(y_train)*100:.1f}%)")

# Compute per-class weights (balanced)
classes = np.unique(y_train)
print(f"Unique classes: {classes}")
cw = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weight = dict(zip(classes, cw))

print(f"\nComputed class weights:")
for cls, weight in class_weight.items():
    print(f"  Class {cls} ({label_encoder.classes_[cls]}): {weight:.3f}")

# Create sample weights for training data
sample_weight_train = np.array([class_weight[label] for label in y_train])

print(f"\nSample weight statistics:")
print(f"  Min weight: {sample_weight_train.min():.3f}")
print(f"  Max weight: {sample_weight_train.max():.3f}")
print(f"  Mean weight: {sample_weight_train.mean():.3f}")
num_classes = len(classes)

Implementing XGBoost with class weight balancing...
Target class distribution:
  Class 0 (1): 47 samples (33.1%)
  Class 1 (2): 57 samples (40.1%)
  Class 2 (3): 38 samples (26.8%)
Unique classes: [0 1 2]

Computed class weights:
  Class 0 (1): 1.007
  Class 1 (2): 0.830
  Class 2 (3): 1.246

Sample weight statistics:
  Min weight: 0.830
  Max weight: 1.246
  Mean weight: 1.000


In [None]:
# XGBoost for Feature Selection
print("Training XGBoost for feature importance...")

# Train XGBoost with optimal parameters for feature selection
xgb_feature_selector = xgb.XGBClassifier(
    objective='multi:softprob',  # For multiclass probability output
    num_class=num_classes,
    eval_metric='mlogloss',
    n_estimators=200,
    max_depth=12,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    reg_lambda=0.5,
    reg_alpha = 0.5,
    random_state=42,
    n_jobs=-1,
    #early_stopping_rounds=50,
    verbosity=1
)

# Handle class imbalance for feature selection
classes = np.unique(y_train)
cw = compute_class_weight(class_weight='balanced', classes=classes, y=y_train)
class_weight = dict(zip(classes, cw))
sample_weight_train = np.array([class_weight[label] for label in y_train])

xgb_feature_selector.fit(X_train, y_train, sample_weight=sample_weight_train)

# Get feature importances
feature_names = X.columns.tolist()
feature_importances = xgb_feature_selector.feature_importances_

# Create a DataFrame for better visualization
feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': feature_importances
}).sort_values('importance', ascending=False)
print("\nFeature importances obtained from XGBoost.")
display(feature_importance_df)

# Create a single combined plot with individual and cumulative importance
# Calculate cumulative importance
cumsum_importance = np.cumsum(feature_importance_df['importance'].values)

# Create figure with single y-axis
fig = go.Figure()

# Add bar chart for individual feature importances
fig.add_trace(
    go.Bar(
        x=feature_importance_df['feature'].values,
        y=feature_importance_df['importance'].values,
        name='Individual feature importance',
        marker=dict(color='lightblue', line=dict(color='darkblue', width=0.5)),
        opacity=0.7,
        text=[f'{val:.4f}' for val in feature_importance_df['importance'].values],
        textposition='inside',
        textfont=dict(size=10, color='black')
    )
)

# Add line chart for cumulative importance
fig.add_trace(
    go.Scatter(
        x=feature_importance_df['feature'].values,
        y=cumsum_importance,
        mode='lines+markers',
        name='Cumulative feature importance',
        line=dict(color='red', width=3),
        marker=dict(size=6, color='darkred')
    )
)

# Update axes labels
fig.update_xaxes(title_text="Feature name (ranked by importance)")
fig.update_yaxes(title_text="Feature importance")

# Update layout
fig.update_layout(
    title_text="XGBoost Feature Importance Analysis - Individual and Cumulative",
    height=600,
    width=900,
    hovermode='x unified',
    legend=dict(
        orientation="v",
        yanchor="middle",
        y=0.5,
        xanchor="right",
        x=0.99
    )
)

fig.show()

# Print statistics about feature importance
print(f"\nTotal number of features: {len(feature_importance_df)}")
print("\nCumulative importance by number of features:")
for i in range(1, 21):
    if i <= len(feature_importance_df):
        cumsum = feature_importance_df.head(i)['importance'].sum()
        print(f"  Top {i:2d} features contribute: {cumsum:.4f} ({cumsum*100:.1f}%)")


Training XGBoost for feature importance...



Feature importances obtained from XGBoost.


Unnamed: 0,feature,importance
11,col12,0.189688
6,col7,0.170083
12,col13,0.152786
9,col10,0.144206
4,col5,0.065869
10,col11,0.065428
5,col6,0.057504
0,col1,0.051896
1,col2,0.042084
8,col9,0.026242



Total number of features: 13

Cumulative importance by number of features:
  Top  1 features contribute: 0.1897 (19.0%)
  Top  2 features contribute: 0.3598 (36.0%)
  Top  3 features contribute: 0.5126 (51.3%)
  Top  4 features contribute: 0.6568 (65.7%)
  Top  5 features contribute: 0.7226 (72.3%)
  Top  6 features contribute: 0.7881 (78.8%)
  Top  7 features contribute: 0.8456 (84.6%)
  Top  8 features contribute: 0.8975 (89.7%)
  Top  9 features contribute: 0.9395 (94.0%)
  Top 10 features contribute: 0.9658 (96.6%)
  Top 11 features contribute: 0.9789 (97.9%)
  Top 12 features contribute: 0.9898 (99.0%)
  Top 13 features contribute: 1.0000 (100.0%)


In [None]:
# Select top features based on XGBoost importance
# Let's use top n_features
n_features = 12
top_features = feature_importance_df.head(n_features)['feature'].tolist()

print(f"Selected top {n_features} features for XGBoost training")
print("Top 10 selected features:")
for i, feature in enumerate(top_features[:10]):
    importance = feature_importance_df[feature_importance_df['feature'] == feature]['importance'].values[0]
    print(f"{i+1:2d}. {feature}: {importance:.4f}")

# Create reduced feature sets
X_train_selected = X_train[top_features]
X_test_selected = X_test[top_features]

print(f"\nOriginal feature set: {X_train.shape[1]} features")
print(f"Selected feature set: {X_train_selected.shape[1]} features")
print(f"Reduction: {(1 - X_train_selected.shape[1]/X_train.shape[1])*100:.1f}%")

Selected top 12 features for XGBoost training
Top 10 selected features:
 1.  col12: 0.1897
 2.  col7: 0.1701
 3.  col13: 0.1528
 4.  col10: 0.1442
 5.  col5: 0.0659
 6.  col11: 0.0654
 7.  col6: 0.0575
 8.  col1: 0.0519
 9.  col2: 0.0421
10.  col9: 0.0262

Original feature set: 13 features
Selected feature set: 12 features
Reduction: 7.7%


In [None]:
# Bayesian Optimization for XGBoost Hyperparameters with MCC Optimization
print("="*80)
print("BAYESIAN OPTIMIZATION FOR XGBOOST HYPERPARAMETERS")
print("Optimization Metric: Matthews Correlation Coefficient (MCC)")
print("="*80)



# Define MCC scorer for optimization
mcc_scorer = make_scorer(matthews_corrcoef)

# Define the search space for Bayesian Optimization
search_spaces = {
    'n_estimators': Integer(50, 300),
    'max_depth': Integer(3, 15),
    'learning_rate': Real(0.01, 0.3, prior='log-uniform'),
    'subsample': Real(0.5, 1.0),
    'colsample_bytree': Real(0.5, 1.0),
    'reg_lambda': Real(0.01, 2.0, prior='log-uniform'),
    'reg_alpha': Real(0.01, 2.0, prior='log-uniform')
}

print("\nSearch Space for Bayesian Optimization:")
for param, space in search_spaces.items():
    print(f"  {param}: {space}")

# Create the base XGBoost model
base_xgb = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=num_classes,
    eval_metric='mlogloss',
    random_state=42,
    n_jobs=-1,
    verbosity=0
)

# Define 5-fold stratified cross-validation
cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

print("\nCross-Validation Strategy: 5-Fold Stratified CV")
print(f"Number of optimization iterations: 30")

# Bayesian Optimization with BayesSearchCV
bayes_search = BayesSearchCV(
    estimator=base_xgb,
    search_spaces=search_spaces,
    n_iter=30,  # Number of parameter settings sampled
    cv=cv_strategy,
    scoring=mcc_scorer,
    n_jobs=-1,
    verbose=2,
    random_state=42,
    return_train_score=True
)

print("\nStarting Bayesian Optimization...")
print("This may take several minutes...")
start_time = time.time()

# Fit with sample weights
bayes_search.fit(X_train_selected, y_train, sample_weight=sample_weight_train)

optimization_time = time.time() - start_time
print(f"\nOptimization completed in {optimization_time:.2f} seconds ({optimization_time/60:.2f} minutes)")

# Get best parameters and score
best_params = bayes_search.best_params_
best_score = bayes_search.best_score_

print("\n" + "="*80)
print("OPTIMIZATION RESULTS")
print("="*80)
print(f"\nBest MCC Score (CV): {best_score:.4f}")
print(f"\nBest Hyperparameters:")
for param, value in best_params.items():
    print(f"  {param}: {value}")

# Get the best estimator
best_model = bayes_search.best_estimator_

print("\n" + "="*80)
print("BEST MODEL SUMMARY")
print("="*80)
print(f"Best XGBoost Model Configuration:")
print(best_model)

# Show top 5 parameter combinations
print("\n" + "="*80)
print("TOP 5 PARAMETER COMBINATIONS")
print("="*80)
cv_results = pd.DataFrame(bayes_search.cv_results_)
top_5 = cv_results.nlargest(5, 'mean_test_score')[['params', 'mean_test_score', 'std_test_score', 'rank_test_score']]
for idx, row in top_5.iterrows():
    print(f"\nRank {int(row['rank_test_score'])}:")
    print(f"  MCC Score: {row['mean_test_score']:.4f} (+/- {row['std_test_score']:.4f})")
    print(f"  Parameters: {row['params']}")

BAYESIAN OPTIMIZATION FOR XGBOOST HYPERPARAMETERS
Optimization Metric: Matthews Correlation Coefficient (MCC)

Search Space for Bayesian Optimization:
  n_estimators: Integer(low=50, high=300, prior='uniform', transform='identity')
  max_depth: Integer(low=3, high=15, prior='uniform', transform='identity')
  learning_rate: Real(low=0.01, high=0.3, prior='log-uniform', transform='identity')
  subsample: Real(low=0.5, high=1.0, prior='uniform', transform='identity')
  colsample_bytree: Real(low=0.5, high=1.0, prior='uniform', transform='identity')
  reg_lambda: Real(low=0.01, high=2.0, prior='log-uniform', transform='identity')
  reg_alpha: Real(low=0.01, high=2.0, prior='log-uniform', transform='identity')

Cross-Validation Strategy: 5-Fold Stratified CV
Number of optimization iterations: 30

Starting Bayesian Optimization...
This may take several minutes...
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 fo

In [None]:
# 5-Fold Cross-Validation with Best Model
print("\n" + "="*80)
print("5-FOLD CROSS-VALIDATION WITH BEST MODEL")
print("="*80)



# Define custom scoring metrics for cross-validation
scoring_metrics = {
    'mcc': mcc_scorer,
    'accuracy': 'accuracy',
    'precision_macro': 'precision_macro',
    'recall_macro': 'recall_macro',
    'f1_macro': 'f1_macro'
}

print("\nPerforming 5-fold cross-validation with best model...")
cv_results_best = cross_validate(
    best_model, 
    X_train_selected, 
    y_train,
    cv=cv_strategy,
    scoring=scoring_metrics,
    return_train_score=True,
    n_jobs=-1
)

print("\nCross-Validation Results (5-Fold):")
print("-"*60)
for metric in ['mcc', 'accuracy', 'precision_macro', 'recall_macro', 'f1_macro']:
    test_scores = cv_results_best[f'test_{metric}']
    train_scores = cv_results_best[f'train_{metric}']
    print(f"{metric.upper()}:")
    print(f"  Test:  {test_scores.mean():.4f} (+/- {test_scores.std():.4f})")
    print(f"  Train: {train_scores.mean():.4f} (+/- {train_scores.std():.4f})")

# Retrain best model on full training data
print("\n" + "="*80)
print("TRAINING BEST MODEL ON FULL TRAINING SET")
print("="*80)

start_time = time.time()
best_model.fit(X_train_selected, y_train, sample_weight=sample_weight_train)
training_time = time.time() - start_time

print(f"Training completed in {training_time:.2f} seconds")

# Make predictions
y_pred = best_model.predict(X_test_selected)
y_pred_proba = best_model.predict_proba(X_test_selected)

print("Predictions completed!")


5-FOLD CROSS-VALIDATION WITH BEST MODEL

Performing 5-fold cross-validation with best model...

Cross-Validation Results (5-Fold):
------------------------------------------------------------
MCC:
  Test:  0.9588 (+/- 0.0598)
  Train: 1.0000 (+/- 0.0000)
ACCURACY:
  Test:  0.9722 (+/- 0.0403)
  Train: 1.0000 (+/- 0.0000)
PRECISION_MACRO:
  Test:  0.9747 (+/- 0.0362)
  Train: 1.0000 (+/- 0.0000)
RECALL_MACRO:
  Test:  0.9751 (+/- 0.0367)
  Train: 1.0000 (+/- 0.0000)
F1_MACRO:
  Test:  0.9735 (+/- 0.0386)
  Train: 1.0000 (+/- 0.0000)

TRAINING BEST MODEL ON FULL TRAINING SET

Cross-Validation Results (5-Fold):
------------------------------------------------------------
MCC:
  Test:  0.9588 (+/- 0.0598)
  Train: 1.0000 (+/- 0.0000)
ACCURACY:
  Test:  0.9722 (+/- 0.0403)
  Train: 1.0000 (+/- 0.0000)
PRECISION_MACRO:
  Test:  0.9747 (+/- 0.0362)
  Train: 1.0000 (+/- 0.0000)
RECALL_MACRO:
  Test:  0.9751 (+/- 0.0367)
  Train: 1.0000 (+/- 0.0000)
F1_MACRO:
  Test:  0.9735 (+/- 0.0386)
  Tra

In [None]:
# Comprehensive Metrics Calculation for Final Report
print("\n" + "="*80)
print("FINAL MODEL EVALUATION REPORT")
print("="*80)


# 1. Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
print("\n1. CONFUSION MATRIX")
print("-"*60)
print(cm)

# 2. Calculate all required metrics
class_names = label_encoder.classes_
class_names_str = [str(name) for name in class_names]
n_classes = len(class_names)

# Binarize the output for multi-class metrics
y_test_binarized = label_binarize(y_test, classes=list(range(n_classes)))

# Per-class metrics
precision_per_class = []
recall_per_class = []
f1_per_class = []
tnr_per_class = []  # True Negative Rate (Specificity)
npv_per_class = []  # Negative Predictive Value
fpr_per_class = []  # False Positive Rate
fnr_per_class = []  # False Negative Rate

print("\n" + "="*80)
print("PER-CLASS METRICS CALCULATION")
print("="*80)

for i in range(n_classes):
    # True Positives, False Positives, True Negatives, False Negatives
    tp = cm[i, i]
    fp = cm[:, i].sum() - tp
    fn = cm[i, :].sum() - tp
    tn = cm.sum() - tp - fp - fn
    
    # Precision (Positive Predictive Value)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    precision_per_class.append(precision)
    
    # Recall (True Positive Rate / Sensitivity)
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    recall_per_class.append(recall)
    
    # F1 Score
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    f1_per_class.append(f1)
    
    # True Negative Rate (Specificity)
    tnr = tn / (tn + fp) if (tn + fp) > 0 else 0
    tnr_per_class.append(tnr)
    
    # Negative Predictive Value
    npv = tn / (tn + fn) if (tn + fn) > 0 else 0
    npv_per_class.append(npv)
    
    # False Positive Rate
    fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
    fpr_per_class.append(fpr)
    
    # False Negative Rate
    fnr = fn / (fn + tp) if (fn + tp) > 0 else 0
    fnr_per_class.append(fnr)

# Calculate macro averages
macro_precision = np.mean(precision_per_class)
macro_recall = np.mean(recall_per_class)  # Macro TPR
macro_f1 = np.mean(f1_per_class)
macro_tnr = np.mean(tnr_per_class)
macro_npv = np.mean(npv_per_class)
macro_fpr = np.mean(fpr_per_class)
macro_fnr = np.mean(fnr_per_class)

# Calculate MCC (Matthews Correlation Coefficient)
mcc_value = matthews_corrcoef(y_test, y_pred)

# Calculate Precision-Recall curves and AUC-PR for each class
pr_curves_data = []
pr_auc_scores = []

for i in range(n_classes):
    precision_curve, recall_curve, _ = precision_recall_curve(
        y_test_binarized[:, i], y_pred_proba[:, i]
    )
    pr_auc = average_precision_score(y_test_binarized[:, i], y_pred_proba[:, i])
    pr_auc_scores.append(pr_auc)
    pr_curves_data.append({
        'class': class_names_str[i],
        'precision': precision_curve,
        'recall': recall_curve,
        'auc': pr_auc
    })

# Macro-average AUC-PR
macro_pr_auc = np.mean(pr_auc_scores)

print("\nMetrics calculated successfully!")
print(f"Number of test samples: {len(y_test)}")
print(f"Number of classes: {n_classes}")


FINAL MODEL EVALUATION REPORT

1. CONFUSION MATRIX
------------------------------------------------------------
[[12  0  0]
 [ 0 14  0]
 [ 0  0 10]]

PER-CLASS METRICS CALCULATION

Metrics calculated successfully!
Number of test samples: 36
Number of classes: 3


In [None]:
# FINAL REPORT - Visualizations and Metrics
print("\n" + "="*80)
print("FINAL EVALUATION REPORT")
print("="*80)


# 1. CONFUSION MATRIX HEATMAP
print("\n1. Confusion Matrix")
print("-"*60)
print(cm)

fig_cm = go.Figure(data=go.Heatmap(
    z=cm,
    x=[f'{class_names_str[i]}' for i in range(n_classes)],
    y=[f'{class_names_str[i]}' for i in range(n_classes)],
    colorscale='Blues',
    text=cm,
    texttemplate="%{text}",
    textfont={"size": 14},
    hovertemplate='True: %{y}<br>Predicted: %{x}<br>Count: %{z}<extra></extra>'
))

fig_cm.update_layout(
    title='Confusion Matrix',
    xaxis_title='Predicted Class',
    yaxis_title='True Class',
    width=600,
    height=500,
    font=dict(size=12)
)

fig_cm.show()

# 2. PRECISION-RECALL CURVES
print("\n2. Precision-Recall Curves")
print("-"*60)

fig_pr = go.Figure()

# Color palette for classes
colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown', 'pink', 'gray', 'cyan', 'magenta']

# Plot PR curve for each class
for i, pr_data in enumerate(pr_curves_data):
    fig_pr.add_trace(go.Scatter(
        x=pr_data['recall'],
        y=pr_data['precision'],
        mode='lines',
        name=f"{pr_data['class']} (AUC={pr_data['auc']:.3f})",
        line=dict(color=colors[i % len(colors)], width=2)
    ))

# Add macro-average line
fig_pr.add_trace(go.Scatter(
    x=[0, 1],
    y=[macro_pr_auc, macro_pr_auc],
    mode='lines',
    name=f'Macro Avg AUC-PR={macro_pr_auc:.3f}',
    line=dict(color='black', width=3, dash='dash')
))

fig_pr.update_layout(
    title='Precision-Recall Curves (All Classes)',
    xaxis_title='Recall',
    yaxis_title='Precision',
    width=900,
    height=600,
    legend=dict(x=0.02, y=0.02),
    xaxis=dict(range=[0, 1]),
    yaxis=dict(range=[0, 1])
)

fig_pr.show()

print(f"Macro-Average AUC-PR: {macro_pr_auc:.4f}")
for i, pr_data in enumerate(pr_curves_data):
    print(f"  {pr_data['class']}: {pr_data['auc']:.4f}")

# 3. FINAL METRICS SUMMARY
print("\n" + "="*80)
print("FINAL METRICS SUMMARY (Macro Averages)")
print("="*80)

metrics_summary = {
    '3. Macro-Average Area Under Precision-Recall Curve': macro_pr_auc,
    '4. Macro TPR (True Positive Rate / Recall / Sensitivity)': macro_recall,
    '5. Macro Precision': macro_precision,
    '6. Macro Positive F1-Score': macro_f1,
    '7. Macro TNR (True Negative Rate / Specificity)': macro_tnr,
    '8. Macro NPV (Negative Predictive Value)': macro_npv,
    '9. Macro FPR (False Positive Rate)': macro_fpr,
    '10. Macro FNR (False Negative Rate)': macro_fnr,
    '11. MCC (Matthews Correlation Coefficient)': mcc_value
}

for idx, (metric_name, value) in enumerate(metrics_summary.items(), 1):
    print(f"{metric_name}: {value:.4f}")

# Create a comprehensive summary table
print("\n" + "="*80)
print("COMPREHENSIVE METRICS TABLE")
print("="*80)
print(f"{'Metric':<50} {'Value':<10}")
print("="*80)
for metric_name, value in metrics_summary.items():
    print(f"{metric_name:<50} {value:<10.4f}")
print("="*80)

# Per-class breakdown (additional information)
print("\n" + "="*80)
print("PER-CLASS METRICS BREAKDOWN")
print("="*80)
print(f"{'Class':<15} {'Precision':<12} {'Recall/TPR':<12} {'F1':<10} {'TNR':<10} {'NPV':<10} {'FPR':<10} {'FNR':<10}")
print("-"*100)
for i in range(n_classes):
    print(f"{class_names_str[i]:<15} {precision_per_class[i]:<12.4f} {recall_per_class[i]:<12.4f} "
          f"{f1_per_class[i]:<10.4f} {tnr_per_class[i]:<10.4f} {npv_per_class[i]:<10.4f} "
          f"{fpr_per_class[i]:<10.4f} {fnr_per_class[i]:<10.4f}")
print("-"*100)
print(f"{'MACRO AVG':<15} {macro_precision:<12.4f} {macro_recall:<12.4f} "
      f"{macro_f1:<10.4f} {macro_tnr:<10.4f} {macro_npv:<10.4f} "
      f"{macro_fpr:<10.4f} {macro_fnr:<10.4f}")
print("="*100)


FINAL EVALUATION REPORT

1. Confusion Matrix
------------------------------------------------------------
[[12  0  0]
 [ 0 14  0]
 [ 0  0 10]]



2. Precision-Recall Curves
------------------------------------------------------------


Macro-Average AUC-PR: 1.0000
  1: 1.0000
  2: 1.0000
  3: 1.0000

FINAL METRICS SUMMARY (Macro Averages)
3. Macro-Average Area Under Precision-Recall Curve: 1.0000
4. Macro TPR (True Positive Rate / Recall / Sensitivity): 1.0000
5. Macro Precision: 1.0000
6. Macro Positive F1-Score: 1.0000
7. Macro TNR (True Negative Rate / Specificity): 1.0000
8. Macro NPV (Negative Predictive Value): 1.0000
9. Macro FPR (False Positive Rate): 0.0000
10. Macro FNR (False Negative Rate): 0.0000
11. MCC (Matthews Correlation Coefficient): 1.0000

COMPREHENSIVE METRICS TABLE
Metric                                             Value     
3. Macro-Average Area Under Precision-Recall Curve 1.0000    
4. Macro TPR (True Positive Rate / Recall / Sensitivity) 1.0000    
5. Macro Precision                                 1.0000    
6. Macro Positive F1-Score                         1.0000    
7. Macro TNR (True Negative Rate / Specificity)    1.0000    
8. Macro NPV (Negative Predictive Value)           1.0000  

In [56]:
# Final Summary Visualization - Metrics Comparison
print("\n" + "="*80)
print("VISUAL SUMMARY OF KEY METRICS")
print("="*80)

# Create a bar chart for key macro metrics
fig_summary = go.Figure()

metric_names = [
    'Macro AUC-PR',
    'Macro TPR',
    'Macro Precision', 
    'Macro F1',
    'Macro TNR',
    'Macro NPV',
    'MCC'
]

metric_values = [
    macro_pr_auc,
    macro_recall,
    macro_precision,
    macro_f1,
    macro_tnr,
    macro_npv,
    mcc_value
]

# Create color coding: green for good, yellow for moderate, red for poor
colors_bar = []
for val in metric_values:
    if val >= 0.8:
        colors_bar.append('green')
    elif val >= 0.6:
        colors_bar.append('orange')
    else:
        colors_bar.append('red')

fig_summary.add_trace(go.Bar(
    x=metric_names,
    y=metric_values,
    marker_color=colors_bar,
    text=[f'{v:.4f}' for v in metric_values],
    textposition='auto',
    textfont=dict(size=14, color='white')
))

fig_summary.update_layout(
    title='Key Performance Metrics Summary',
    xaxis_title='Metric',
    yaxis_title='Score',
    yaxis=dict(range=[0, 1]),
    width=900,
    height=500,
    font=dict(size=12)
)

fig_summary.show()

# Final Report Summary
print("\n" + "="*80)
print("OPTIMIZATION AND EVALUATION SUMMARY")
print("="*80)
print(f"\nOptimization Method: Bayesian Optimization")
print(f"Optimization Metric: Matthews Correlation Coefficient (MCC)")
print(f"Cross-Validation: 5-Fold Stratified CV")
print(f"Number of Features: {len(top_features)}")
print(f"Number of Classes: {n_classes}")
print(f"Test Set Size: {len(y_test)}")
print(f"\nBest Hyperparameters Found:")
for param, value in best_params.items():
    print(f"  {param}: {value}")
print(f"\nFinal Test Performance:")
print(f"  Matthews Correlation Coefficient (MCC): {mcc_value:.4f}")
print(f"  Macro-Average AUC-PR: {macro_pr_auc:.4f}")
print(f"  Macro F1-Score: {macro_f1:.4f}")
print(f"  Macro Precision: {macro_precision:.4f}")
print(f"  Macro Recall (TPR): {macro_recall:.4f}")
print(f"  Macro TNR (Specificity): {macro_tnr:.4f}")
print(f"  Macro NPV: {macro_npv:.4f}")
print(f"  Macro FPR: {macro_fpr:.4f}")
print(f"  Macro FNR: {macro_fnr:.4f}")

print("\n" + "="*80)
print("REPORT COMPLETE")
print("="*80)


VISUAL SUMMARY OF KEY METRICS



OPTIMIZATION AND EVALUATION SUMMARY

Optimization Method: Bayesian Optimization
Optimization Metric: Matthews Correlation Coefficient (MCC)
Cross-Validation: 5-Fold Stratified CV
Number of Features: 12
Number of Classes: 3
Test Set Size: 36

Best Hyperparameters Found:
  colsample_bytree: 0.7224162561505759
  learning_rate: 0.22754356809600707
  max_depth: 4
  n_estimators: 158
  reg_alpha: 0.02707014123621835
  reg_lambda: 0.11063627982877337
  subsample: 0.5777240270252717

Final Test Performance:
  Matthews Correlation Coefficient (MCC): 1.0000
  Macro-Average AUC-PR: 1.0000
  Macro F1-Score: 1.0000
  Macro Precision: 1.0000
  Macro Recall (TPR): 1.0000
  Macro TNR (Specificity): 1.0000
  Macro NPV: 1.0000
  Macro FPR: 0.0000
  Macro FNR: 0.0000

REPORT COMPLETE
