In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Multiclass
!ls

In [None]:
!pip install torchmetrics
!pip install pycm
!pip install tensorflow_addons
!pip install evaluate
!pip install mlxtend
!pip install pytorch-ignite
!pip install statsmodels

In [5]:
import pandas as pd
import numpy as np
from decimal import getcontext, Decimal
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
import json
from sklearn.metrics import (classification_report, accuracy_score, precision_score, recall_score, f1_score,
                             log_loss, cohen_kappa_score, roc_curve, matthews_corrcoef,
                             balanced_accuracy_score, confusion_matrix, jaccard_score, fbeta_score,
                             precision_recall_curve, roc_auc_score, hamming_loss, zero_one_loss, average_precision_score, top_k_accuracy_score)
from pycm import ConfusionMatrix
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from torchmetrics import Accuracy, Precision, Recall, F1Score, MatthewsCorrCoef, CohenKappa, FBetaScore, JaccardIndex, ConfusionMatrix
import torch
import evaluate
from mlxtend.evaluate import confusion_matrix as mlxtend_confusion_matrix, accuracy_score as mlxtend_accuracy_score
from nltk.metrics import precision, recall, f_measure, ConfusionMatrix
import statsmodels.api as sm


# Load Data

In [6]:
# Setting
np.set_printoptions(precision=25)
getcontext().prec = 25
pd.options.display.float_format = '{:.25f}'.format
np.set_printoptions(precision=25)
pd.set_option('display.float_format', '{:.25f}'.format)
results_multi = {}
results_bi = {}

In [None]:
# Data 1
mri_features = pd.read_excel('MRI_Radiomics Features.xlsx')
us_mri_features = pd.read_excel('US_MRI_Radiomics Features.xlsx')
ucla_data = pd.read_excel('UCLA.xlsx')

print(f'MRI_Features: {mri_features.shape}')
print(f'us_mri_features: {us_mri_features.shape}')
print(f'ucla_data: {ucla_data.shape}')

ucla_data['lesion_name'] = ucla_data['lesion_name'].str.replace('_US_Prostate', '')
merged_data = ucla_data.merge(mri_features, left_on='lesion_name', right_on='PatientID')
class_to_remove = 1
filtered_data = merged_data[merged_data['UCLA Score (Similar to PIRADS v2)_Multi-class'] != 0]
filtered_data = filtered_data[filtered_data['UCLA Score (Similar to PIRADS v2)_Multi-class'] != 1]
filtered_data = filtered_data[filtered_data['UCLA Score (Similar to PIRADS v2)_Multi-class'] != 2]

features_filtered = filtered_data.drop(columns=['lesion_name', 'PatientID', 'Labels_Binary', 'Label'])
y_true_multi_filtered = filtered_data['UCLA Score (Similar to PIRADS v2)_Multi-class']
y_true_bi_filtered = filtered_data['Labels_Binary']

X_train, X_test, y_train, y_test = train_test_split(features_filtered, y_true_multi_filtered, test_size=0.2, random_state=42)
y_true_1 = y_test
y_true_1 = np.array(y_true_1)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [7]:
# Data 2
data_multi = pd.read_csv('y_true_y_pred_multi.csv')
y_true_2 = data_multi['y_true']
y_pred_2 = data_multi['y_pred']
y_true_2 = np.array(y_true_2)
y_pred_2 = np.array(y_pred_2)

In [10]:
# Model
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train, y_train)
y_pred_1 = model_rf.predict(X_test)
y_pred_proba_1 = model_rf.predict_proba(X_test)
y_pred_1 = np.array(y_pred_1)
y_pred_proba_1 = np.array(y_pred_proba_1)

In [11]:
results = {}
y_true = y_true_1
y_pred = y_pred_1
y_pred_proba = y_pred_proba_1

# **Scikit-learn Library:**

In [12]:
results['Scikit-learn'] = {
    'accuracy': accuracy_score(y_true, y_pred),
    'precision': precision_score(y_true, y_pred, average= None),
    'precision_macro': precision_score(y_true, y_pred, average='macro'),
    'precision_micro': precision_score(y_true, y_pred, average='micro'),
    'precision_weighted': precision_score(y_true, y_pred, average='weighted'),

    'recall': recall_score(y_true, y_pred, average= None),
    'recall_macro': recall_score(y_true, y_pred, average='macro'),
    'recall_micro': recall_score(y_true, y_pred, average='micro'),
    'recall_weighted': recall_score(y_true, y_pred, average='weighted'),

    'f1': f1_score(y_true, y_pred, average= None),
    'f1_macro': f1_score(y_true, y_pred, average='macro'),
    'f1_micro': f1_score(y_true, y_pred, average='micro'),
    'f1_weighted': f1_score(y_true, y_pred, average='weighted'),

    'log_loss': log_loss(y_true, y_pred_proba),
    'kappa': cohen_kappa_score(y_true, y_pred),
    'mcc': matthews_corrcoef(y_true, y_pred),
    'balanced_accuracy': balanced_accuracy_score(y_true, y_pred),
    'f beta_score': fbeta_score(y_true, y_pred, beta=0.5, average='micro'),
    'jaccard_index_weight': jaccard_score(y_true, y_pred,average='weighted'),
    'jaccard_index_macro': jaccard_score(y_true, y_pred,average='macro'),
    'jaccard_index_micro': jaccard_score(y_true, y_pred,average='micro'),
    'jaccard_index': jaccard_score(y_true, y_pred,average=None),
    'confusion_matrix': confusion_matrix(y_true, y_pred),

}

# **Pycm Library:**

In [21]:
from pycm import ConfusionMatrix

cm = ConfusionMatrix(actual_vector=y_true, predict_vector=y_pred)

results = {}
results['PyCM'] = {
    'accuracy': cm.Overall_ACC,
    'precision': cm.PPV,
    'recall': cm.TPR,
    'f1': cm.F1,
    'precision_macro': cm.PPV_Macro,
    'precision_micro': cm.PPV_Micro,
    'recall_macro': cm.TPR_Macro,
    'recall_micro': cm.TPR_Micro,
    'f1_macro': cm.F1_Macro,
    'f1_micro': cm.F1_Micro,
    'kappa': cm.Kappa,
    'mcc_OVER': cm.Overall_MCC,
    'mcc': cm.MCC,
    'jaccard_index_OVER': cm.Overall_J,
    'jaccard_index': cm.J
}

# **TensorFlow Library:**

In [24]:
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import cohen_kappa_score, matthews_corrcoef

# Convert y_true and y_pred to tensors
y_true_tf = tf.convert_to_tensor(y_true, dtype=tf.int32)
y_pred_tf = tf.convert_to_tensor(y_pred, dtype=tf.int32)

# One-hot encode y_true and y_pred
one_hot_encoder = OneHotEncoder(sparse_output=False)
y_true_one_hot = one_hot_encoder.fit_transform(y_true.reshape(-1, 1))
y_pred_one_hot = one_hot_encoder.transform(y_pred.reshape(-1, 1))

# Convert to tensors
y_true_tensor = tf.convert_to_tensor(y_true_one_hot, dtype=tf.float32)
y_pred_tensor = tf.convert_to_tensor(y_pred_one_hot, dtype=tf.float32)

# Initialize metrics
accuracy = tf.keras.metrics.Accuracy()
precision = tf.keras.metrics.Precision()
recall = tf.keras.metrics.Recall()
balanced_accuracy = tf.keras.metrics.CategoricalAccuracy()
jaccard = tf.keras.metrics.MeanIoU(num_classes=3)

# Cross-entropy for log loss
y_pred_proba_tensor = tf.convert_to_tensor(y_pred_proba, dtype=tf.float32)
log_loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y_true_tensor, y_pred_proba_tensor))

accuracy.update_state(y_true, y_pred)
precision.update_state(y_true_tensor, y_pred_tensor)
recall.update_state(y_true_tensor, y_pred_tensor)
jaccard.update_state(y_true_tensor, y_pred_tensor)
balanced_accuracy.update_state(y_true_tensor, y_pred_tensor)

y_true_np = y_true_tensor.numpy()
y_pred_np = tf.argmax(y_pred_tensor, axis=1).numpy()
cohen_kappa = cohen_kappa_score(y_true_np.argmax(axis=1), y_pred_np)
mcc = matthews_corrcoef(y_true_np.argmax(axis=1), y_pred_np)

f1 = 2 * (precision.result().numpy() * recall.result().numpy()) / (precision.result().numpy() + recall.result().numpy())

results['TensorFlow'] = {
    'accuracy': accuracy.result().numpy(),
    'precision': precision.result().numpy(),
    'recall': recall.result().numpy(),
    'f1': f1,
    'balanced_accuracy': balanced_accuracy.result().numpy(),
    'log_loss': log_loss.numpy(),
    'jaccard_index': jaccard.result().numpy(),
    'cohen_kappa': cohen_kappa,
    'mcc': mcc,
}

# **TorchMetrics Library:**

In [27]:
from sklearn.metrics import confusion_matrix
from torchmetrics import ConfusionMatrix

y_true_adjusted = y_true - 3
y_pred_adjusted = y_pred - 3

y_true_tensor = torch.tensor(y_true_adjusted)
y_pred_tensor = torch.tensor(y_pred_adjusted)
num_classes = len(torch.unique(y_true_tensor))

accuracy = Accuracy(task='multiclass', num_classes=num_classes)
precision = Precision(task='multiclass', num_classes=num_classes)
precision_macro = Precision(task='multiclass', num_classes=num_classes, average='macro')
precision_micro = Precision(task='multiclass', num_classes=num_classes, average='micro')
precision_weighted = Precision(task='multiclass', num_classes=num_classes, average='weighted')
recall = Recall(task='multiclass', num_classes=num_classes)
recall_macro = Recall(task='multiclass', num_classes=num_classes, average='macro')
recall_micro = Recall(task='multiclass', num_classes=num_classes, average='micro')
f1 = F1Score(task='multiclass', num_classes=num_classes)
f1_macro = F1Score(task='multiclass', num_classes=num_classes, average='macro')
f1_micro = F1Score(task='multiclass', num_classes=num_classes, average='micro')
f1_weighted = F1Score(task='multiclass', num_classes=num_classes, average='weighted')
matthews_corrcoef = MatthewsCorrCoef(task='multiclass', num_classes=num_classes)
cohen_kappa =CohenKappa(task='multiclass',num_classes=num_classes)
fbeta_macro = FBetaScore(task='multiclass', beta=0.5, num_classes=num_classes, average='macro')
fbeta_micro = FBetaScore(task='multiclass', beta=0.5, num_classes=num_classes, average='micro')
jaccard_index = JaccardIndex(task='multiclass', num_classes=num_classes)
jaccard_index_macro = JaccardIndex(task='multiclass', num_classes=num_classes, average='macro')
jaccard_index_micro = JaccardIndex(task='multiclass', num_classes=num_classes, average='micro')
jaccard_index_weighted = JaccardIndex(task='multiclass', num_classes=num_classes, average='weighted')
confusion_matrix = ConfusionMatrix(task='multiclass', num_classes=num_classes)

results['TorchMetrics'] = {
    'accuracy': accuracy(y_pred_tensor, y_true_tensor).item(),
    'precision': precision(y_pred_tensor, y_true_tensor).item(),
    'precision_macro': precision_macro(y_pred_tensor, y_true_tensor).item(),
    'precision_micro': precision_micro(y_pred_tensor, y_true_tensor).item(),
    'precision_weighted': precision_weighted(y_pred_tensor, y_true_tensor).item(),
    'recall': recall(y_pred_tensor, y_true_tensor).item(),
    'recall_macro': recall_macro(y_pred_tensor, y_true_tensor).item(),
    'recall_micro': recall_micro(y_pred_tensor, y_true_tensor).item(),
    'f1': f1(y_pred_tensor, y_true_tensor).item(),
    'f1_macro': f1_macro(y_pred_tensor, y_true_tensor).item(),
    'f1_micro': f1_micro(y_pred_tensor, y_true_tensor).item(),
    'f1_weighted': f1_weighted(y_pred_tensor, y_true_tensor).item(),
    'mcc': matthews_corrcoef(y_pred_tensor, y_true_tensor).item(),
    'kappa': cohen_kappa(y_pred_tensor, y_true_tensor).item(),
    'f_beta_macro': fbeta_macro(y_pred_tensor, y_true_tensor).item(),
    'f beta_score': fbeta_micro(y_pred_tensor, y_true_tensor).item(),
    'jaccard_index': jaccard_index(y_pred_tensor, y_true_tensor).item(),
    'jaccard_index_macro': jaccard_index_macro(y_pred_tensor, y_true_tensor).item(),
    'jaccard_index_micro': jaccard_index_micro(y_pred_tensor, y_true_tensor).item(),
    'jaccard_index_weight': jaccard_index_weighted(y_pred_tensor, y_true_tensor).item(),
    'confusion_matrix': confusion_matrix(y_pred_tensor, y_true_tensor).cpu().numpy(),
}


#**evaluate Library:**

In [None]:
accuracy_metric = evaluate.load("accuracy")
precision_metric = evaluate.load("precision")
recall_metric = evaluate.load("recall")
f1_metric = evaluate.load("f1")
confusion_matrix_metric = evaluate.load("confusion_matrix")


accuracy_result = accuracy_metric.compute(references=y_true, predictions=y_pred)
precision_result = precision_metric.compute(references=y_true, predictions=y_pred, average= None)
precision_result_macro = precision_metric.compute(references=y_true, predictions=y_pred, average='macro')
precision_result_micro = precision_metric.compute(references=y_true, predictions=y_pred, average='micro')
precision_result_weighted = precision_metric.compute(references=y_true, predictions=y_pred, average='weighted')
recall_result = recall_metric.compute(references=y_true, predictions=y_pred, average= None)
recall_result_macro = recall_metric.compute(references=y_true, predictions=y_pred, average='macro')
recall_result_micro = recall_metric.compute(references=y_true, predictions=y_pred, average='micro')
recall_result_weighted = recall_metric.compute(references=y_true, predictions=y_pred, average='weighted')
f1_result = f1_metric.compute(references=y_true, predictions=y_pred, average= None)
f1_result_macro = f1_metric.compute(references=y_true, predictions=y_pred, average='macro')
f1_result_micro = f1_metric.compute(references=y_true, predictions=y_pred, average='micro')
f1_result_weighted = f1_metric.compute(references=y_true, predictions=y_pred, average='weighted')
confusion_matrix_result = confusion_matrix_metric.compute(references=y_true, predictions=y_pred)
mcc_metric = evaluate.load("matthews_correlation") # Load MCC metric

mcc_result = mcc_metric.compute(references=y_true, predictions=y_pred)

results['evaluate'] = {
    'accuracy': accuracy_result['accuracy'],
    'precision': precision_result['precision'],
    'precision_macro': precision_result_macro['precision'],
    'precision_micro': precision_result_micro['precision'],
    'precision_weighted': precision_result_weighted['precision'],
    'recall': recall_result['recall'],
    'recall_macro': recall_result_macro['recall'],
    'recall_micro': recall_result_micro['recall'],
    'recall_weighted': recall_result_weighted['recall'],
    'f1': f1_result['f1'],
    'f1_macro': f1_result_macro['f1'],
    'f1_micro': f1_result_micro['f1'],
    'f1_weighted': f1_result_weighted['f1'],
    'confusion_matrix': confusion_matrix_result['confusion_matrix'],
    'mcc': mcc_result['matthews_correlation'],
}

#**MLxtend Library:**

In [29]:
results['MLxtend'] = {
    'accuracy': mlxtend_accuracy_score(y_true, y_pred),
    'confusion_matrix': mlxtend_confusion_matrix(y_true, y_pred)
}

#**nltk Library:**

In [31]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import numpy as np

def multi_class_metrics(y_true, y_pred, labels):
    precisions = []
    recalls = []
    f_measures = []

    for label in labels:
        # Create binary indicators for each class
        y_true_binary = [1 if y == label else 0 for y in y_true]
        y_pred_binary = [1 if y == label else 0 for y in y_pred]

        # Calculate precision, recall, and f-measure
        precisions.append(precision_score(y_true_binary, y_pred_binary, zero_division=0))
        recalls.append(recall_score(y_true_binary, y_pred_binary, zero_division=0))
        f_measures.append(f1_score(y_true_binary, y_pred_binary, zero_division=0))

    avg_precision = sum(precisions) / len(labels)
    avg_recall = sum(recalls) / len(labels)
    avg_f_measure = sum(f_measures) / len(labels)

    return avg_precision, avg_recall, avg_f_measure

accuracy = accuracy_score(y_true, y_pred)
labels_multi = np.unique(y_true)
avg_precision, avg_recall, avg_f_measure = multi_class_metrics(y_true, y_pred, labels_multi)

results['nltk'] = {
    'accuracy': accuracy,
    'precision': avg_precision,
    'recall': avg_recall,
    'f1': avg_f_measure,
}



#**Statsmodels Library:**

In [32]:
y_true_series = pd.Series(y_true)
y_pred_series = pd.Series(y_pred)

crosstab = pd.crosstab(y_true_series, y_pred_series, rownames=['Actual'], colnames=['Predicted'])
cm = sm.stats.Table.from_data(crosstab)

results['statsmodels'] = {
    'kappa': cohen_kappa_score(y_true, y_pred),
    'confusion_matrix': cm.table
}

In [None]:
results_df = pd.DataFrame(results).T
results_df.reset_index(inplace=True)
results_df = results_df.rename_axis(None, axis=1)
print(results_df)