In [2]:
# uses my original code; only added the roc-auc part for the binary task.
from typing import Tuple
import pandas as pd
import numpy as np

from cbr import CBR  # import the class from the separate file

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score


binary_target = 'CKD_Progression'
multi_target = 'CKD_Stage'

def main():
    df_binary = pd.read_csv('Binary_data.csv')  
    df_multi = pd.read_csv('Multi_data.csv')
    
    # Binary Features and labels
    X_bin = df_binary.drop('CKD_Progression', axis=1)
    y_bin = df_binary['CKD_Progression'].astype(int)
    b_num_cols = b_num_cols = ['Age', 'Systolic_Pressure', 'BMI', 'Hemoglobin', 'Albumin', 'Creatinine', 'eGFR', 'Protein_Creatinine_Ratio']
    b_cat_cols = [c for c in X_bin if c not in b_num_cols]
    
    # Multiclass Features and labels
    X_mul = df_multi.drop('CKD_Stage', axis=1)
    y_mul = df_multi['CKD_Stage'].astype(int)
    m_num_cols = ['Age', 'Systolic_Pressure', 'BMI', 'Hemoglobin', 'Albumin', 'Creatinine', 'Protein_Creatinine_Ratio']
    m_cat_cols = [c for c in X_mul if c not in m_num_cols]
    
    # Split the dataset to 70/30 Train and test set
    X_train_bin, X_test_bin, y_train_bin, y_test_bin = train_test_split(X_bin, y_bin, test_size=0.30, stratify = y_bin, random_state=42)
    X_train_mul, X_test_mul, y_train_mul, y_test_mul= train_test_split(X_mul, y_mul, test_size=0.30, stratify = y_mul, random_state=42)
    
    Binary_CBR = CBR(
        continuous_features = b_num_cols,
        categorical_features = b_cat_cols,
        target_names = {0: 'No Progression', 1: 'Progression'}
    )
    
    Multi_CBR = CBR(
        continuous_features= m_num_cols,
        categorical_features= m_cat_cols,
        target_names= {1: 'Stage 1', 2: 'Stage 2', 3: 'Stage 3', 4: 'Stage 4', 5: 'Stage 5'}
    )
    
    Binary_CBR.create_case_base(X_train_bin, y_train_bin)
    Multi_CBR.create_case_base(X_train_mul, y_train_mul)
    
    Binary_CBR.optimize_weights_gradient_descent(X_train_bin.iloc[:200], y_train_bin.iloc[:200], learning_rate=0.1, epochs = 300, k=5)
    Multi_CBR.optimize_weights_gradient_descent(X_train_mul.iloc[:200], y_train_mul.iloc[:200], learning_rate=0.1, epochs=300, k=5)
    
    yhat_bin =[]
    yhat_mul = []

    # store a simple probability for y=1 from neighbor votes (for roc-auc)
    p_bin = []
    
    for i in range(len(X_test_bin)):
        query_case = {
            'id': i, 
            'features' : X_test_bin.iloc[i].to_list()
        }
        similar_cases = Binary_CBR.retrieve_similar_cases(query_case, k=10)
        yhat_bin.append(Binary_CBR.reuse_solution(similar_cases, voting_method = 'majority'))
        
        # probability for roc-auc = fraction of neighbors with label 1
        p_bin.append(sum(1 for case, _ in similar_cases if case['solution'] == 1) / max(1, len(similar_cases)))
        
    for i in range(len(X_test_mul)):
        query_case = {
            'id' : i, 
            'features' : X_test_mul.iloc[i].to_list()
        }
        similar_cases = Multi_CBR.retrieve_similar_cases(query_case, k=10)
        yhat_mul.append(Multi_CBR.reuse_solution(similar_cases, voting_method='majority'))
        
    
    print("Binary Classification Report:")
    print(classification_report(y_test_bin, yhat_bin))
    print("Confustion Matrix")
    print(confusion_matrix(y_test_bin, yhat_bin))


    print("ROC-AUC (Binary):", roc_auc_score(y_test_bin, p_bin))
    
    
    # conf_matrix_bin = confusion_matrix(y_test_binary, predicted_binary)
    # fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    # sns.heatmap(conf_matrix_bin, annot=True, cmap='Blues', fmt='d', ax =axes[0],
    #             xticklables=target_names, yticklabels= target_names)
    
    # axes[0].set_title("Binary Confustion Matrix")
    # axes[0].set_xlabel('predicted')
    # axes[0].set_ylabel('Actual')
    
    print("Multiclas Classification report")
    print(classification_report(y_test_mul, yhat_mul))
    print("Confusion Matrix")
    print(confusion_matrix(y_test_mul, yhat_mul))
    
    
    
    # conf_matrix_multi = confusion_matrix(y_test_multi, predicted_multi)
    # fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    # sns.heatmap(conf_matrix_multi, annot=True, cmap='Blues', fmt='d', ax =axes[1],
    #             xticklables=target_names, yticklabels= target_names)
    
    # axes[1].set_title("Multiclass Confustion Matrix")
    # axes[1].set_xlabel('predicted')
    # axes[1].set_ylabel('Actual')       
    
    # plt.tight_layout()
    # plt.show()
    
if __name__ == '__main__':
    main()

Epoch   0/300: Loss = 0.8580, Accuracy = 0.8100
Epoch  10/300: Loss = 0.8577, Accuracy = 0.8100
Epoch  20/300: Loss = 0.8601, Accuracy = 0.8050
Epoch  30/300: Loss = 0.8597, Accuracy = 0.8050
Epoch  40/300: Loss = 0.8573, Accuracy = 0.8050
Epoch  50/300: Loss = 0.8646, Accuracy = 0.8050
Epoch  60/300: Loss = 0.8617, Accuracy = 0.8050
Epoch  70/300: Loss = 0.8643, Accuracy = 0.8050
Epoch  80/300: Loss = 0.8640, Accuracy = 0.8050
Epoch  90/300: Loss = 0.8636, Accuracy = 0.8050
Epoch 100/300: Loss = 0.8659, Accuracy = 0.8050
Epoch 110/300: Loss = 0.8630, Accuracy = 0.8100
Epoch 120/300: Loss = 0.8651, Accuracy = 0.8050
Epoch 130/300: Loss = 0.8624, Accuracy = 0.8050
Epoch 140/300: Loss = 0.8622, Accuracy = 0.8050
Epoch 150/300: Loss = 0.8619, Accuracy = 0.8050
Epoch 160/300: Loss = 0.8647, Accuracy = 0.8050
Epoch 170/300: Loss = 0.8619, Accuracy = 0.8050
Epoch 180/300: Loss = 0.8594, Accuracy = 0.8100
Epoch 190/300: Loss = 0.8589, Accuracy = 0.8100
Epoch 200/300: Loss = 0.8588, Accuracy =