In [None]:
!pip install pycm
!pip install torchmetrics
!pip install scipy
!pip install statsmodels
!pip install nltk
!pip install SimpleITK
!pip install evaluate
!pip install mlxtend
!pip install Pattern
!pip install ignite
!pip install pytorch-ignite

In [None]:
import numpy as np
import pandas as pd
from decimal import getcontext, Decimal
from sklearn.model_selection import train_test_split

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%cd /content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/
%cd /content/drive/MyDrive/Data/Classification/Binary/
!ls

# Load Data

In [None]:
np.set_printoptions(precision=25)
getcontext().prec = 25
pd.options.display.float_format = '{:.25f}'.format
np.set_printoptions(precision=25)
pd.set_option('display.float_format', '{:.25f}'.format)

**Data 1: US MRI Radiomecs**

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

mri_features = pd.read_excel('MRI_Radiomics Features.xlsx')
us_mri_features = pd.read_excel('US_MRI_Radiomics Features.xlsx')
ucla_data = pd.read_excel('UCLA.xlsx')

print(f'MRI_Features: {mri_features.shape}')
print(f'us_mri_features: {us_mri_features.shape}')
print(f'ucla_data: {ucla_data.shape}')

# Update lesion names in ucla_data
ucla_data['lesion_name'] = ucla_data['lesion_name'].str.replace('_US_Prostate', '')

# Merge data based on lesion_name and PatientID
merged_data = ucla_data.merge(mri_features, left_on='lesion_name', right_on='PatientID')


# Remove a specific class (for example, class 1) from multi-class data
class_to_remove = 1
filtered_data = merged_data[merged_data['UCLA Score (Similar to PIRADS v2)_Multi-class'] != 0]
filtered_data = filtered_data[filtered_data['UCLA Score (Similar to PIRADS v2)_Multi-class'] != 1]
filtered_data = filtered_data[filtered_data['UCLA Score (Similar to PIRADS v2)_Multi-class'] != 2]


# Drop name columns and separate features and target variables
features_filtered = filtered_data.drop(columns=['lesion_name', 'PatientID', 'Labels_Binary', 'Label'])
y_true_bi_filtered = filtered_data['Labels_Binary']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features_filtered, y_true_bi_filtered, test_size=0.2, random_state=42)
y_true_1 = y_test
y_true_1 = np.array(y_true_1)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train, y_train)
y_pred_1 = model_rf.predict(X_test)
y_pred_proba_1 = model_rf.predict_proba(X_test)

**Data 2: Random Data:**

In [None]:
data_multi = pd.read_csv('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Others/y_true_y_pred_binary.csv')
y_true_2 = data_multi['y_true']
y_pred_2 = data_multi['y_pred']
y_true_2 = np.array(y_true_2)
y_pred_2 = np.array(y_pred_2)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from openpyxl import Workbook

# Function to process each dataset
def process_dataset(features, targets):
    features.columns = features.columns.astype(str)

    # Preprocess the data
    X = features.iloc[:, 1:]
    y = targets.iloc[:, -1]
        # Drop rows with NaN values in the target
    X = X[~y.isna()]
    y = y.dropna()

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

    # Train the model
    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    # Predict the labels
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)
    y_pred_proba = y_pred_proba[:, 1]  # Adjust index if necessary


    # Return true and predicted labels as a DataFrame
    return pd.DataFrame({'y_true': y_test, 'y_pred': y_pred, 'y_pred_proba': y_pred_proba})

# Load the datasets (features and targets)
features_df1 = pd.read_excel('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/HC_DF_SPT_Head and Neck.xlsx', sheet_name='feature')
targets_df1 = pd.read_excel('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/HC_DF_SPT_Head and Neck.xlsx', sheet_name='target')

features_df2 = pd.read_excel('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/HC_RF_CT_Head and Neck.xlsx', sheet_name='feature')
targets_df2 = pd.read_excel('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/HC_RF_CT_Head and Neck.xlsx', sheet_name='target')

features_df3 = pd.read_excel('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/HC_RF_PT_Head and Neck.xlsx', sheet_name='feature')
targets_df3 = pd.read_excel('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/HC_RF_PT_Head and Neck.xlsx', sheet_name='target')

# Process each dataset separately
results_df1 = process_dataset(features_df1, targets_df1)
results_df2 = process_dataset(features_df2, targets_df2)
results_df3 = process_dataset(features_df3, targets_df3)

# Save all results to a single Excel file with separate sheets
with pd.ExcelWriter('classification_results.xlsx') as writer:
    results_df1.to_excel(writer, sheet_name='HC_DF_SPT_Head and Neck', index=False)
    results_df2.to_excel(writer, sheet_name='HC_RF_CT_Head and Neck', index=False)
    results_df3.to_excel(writer, sheet_name='HC_RF_PT_Head and Neck', index=False)

# Save all results to a specific directory
with pd.ExcelWriter('/content/drive/MyDrive/Loss Function Evaluation/Data/Classification/Binary/classification_results.xlsx') as writer:
    results_df1.to_excel(writer, sheet_name='HC_DF_SPT_Head and Neck', index=False)
    results_df2.to_excel(writer, sheet_name='HC_RF_CT_Head and Neck', index=False)
    results_df3.to_excel(writer, sheet_name='HC_RF_PT_Head and Neck', index=False)


In [None]:
results = {}
df = pd.read_excel('classification_results.xlsx', sheet_name='HC_DF_SPT_Head and Neck')
y_true = df['y_true'].values
y_pred = df['y_pred'].values
y_pred_proba = df[['y_pred_proba']].values


In [None]:
y_true =y_true_2
y_pred =  y_pred_2
#y_pred_proba = y_pred_proba_1

**Scikit Learn Library:**

In [None]:
from sklearn.metrics import (classification_report, accuracy_score, precision_score, recall_score, f1_score,
                             log_loss, cohen_kappa_score, roc_curve, auc, matthews_corrcoef,
                             balanced_accuracy_score, confusion_matrix, jaccard_score, fbeta_score)
from sklearn.metrics import roc_curve, auc

fpr, tpr, thresholds = roc_curve(y_true, y_pred)
roc_auc = auc(fpr, tpr)

results['Scikit-learn'] = {
    'accuracy': accuracy_score(y_true, y_pred),
    'precision': precision_score(y_true, y_pred),
    'recall': recall_score(y_true, y_pred),
    'f1': f1_score(y_true, y_pred),
    'log_loss': log_loss(y_true, y_pred_proba),
    'kappa': cohen_kappa_score(y_true, y_pred),
    'mcc': matthews_corrcoef(y_true, y_pred),
    'balanced_accuracy': balanced_accuracy_score(y_true, y_pred),
    'f beta_score': fbeta_score(y_true, y_pred, beta=0.5),
    'jaccard_index': jaccard_score(y_true, y_pred),
    'confusion_matrix': confusion_matrix(y_true, y_pred),
    'AUC': roc_auc
}

**Pycm Library:**

In [None]:
from pycm import ConfusionMatrix

y_pred = np.array(y_pred)  # Convert to NumPy array if not already
y_true = np.array(y_true)

cm = ConfusionMatrix(actual_vector=y_true, predict_vector=y_pred)
results['PyCM'] = {
    'accuracy': cm.Overall_ACC,
    'precision': cm.PPV[1],
    'recall': cm.TPR[1],
    'f1': cm.F1[1],
    'kappa': cm.Kappa,
    'mcc': cm.Overall_MCC,
    'jaccard_index': cm.J[1],
    'confusion_matrix': cm.to_array()
}

**Tensorflow Library:**

In [None]:
import tensorflow as tf

y_true_tensor = tf.constant(y_true, dtype=tf.float32)
y_pred_tensor = tf.constant(y_pred, dtype=tf.float32)
y_pred_prob_tensor = tf.constant(y_pred_proba, dtype=tf.float32)

# Reshape y_true_tensor and y_pred_tensor to 2D shape
y_true_tensor = tf.reshape(y_true_tensor, [-1, 1])
y_pred_tensor = tf.reshape(y_pred_tensor, [-1, 1])
y_prob_tensor = tf.reshape(y_pred_prob_tensor, [-1, 1])


# Define metrics
accuracy = tf.keras.metrics.Accuracy()
precision = tf.keras.metrics.Precision()
recall_macro = tf.keras.metrics.Recall()
f1 = tf.keras.metrics.F1Score(average='weighted')  # Adjust num_classes and average as needed
fbeta = tf.keras.metrics.FBetaScore( beta=0.5, average=None)  # Adjust num_classes and beta as needed
jaccard = tf.keras.metrics.BinaryIoU(target_class_ids=[0, 1])
log_loss = tf.keras.metrics.BinaryCrossentropy(from_logits=False)


# Update metrics with your data
accuracy.update_state(y_true_tensor, y_pred_tensor)
precision.update_state(y_true_tensor, y_pred_tensor)
recall_macro.update_state(y_true_tensor, y_pred_tensor)
log_loss.update_state(y_true_tensor, y_prob_tensor)
f1.update_state(y_true_tensor, y_pred_tensor)
fbeta.update_state(y_true_tensor, y_pred_tensor)
jaccard.update_state(y_true_tensor, y_pred_tensor)
auc = tf.keras.metrics.AUC()
auc.update_state(y_true, y_pred)

results['TensorFlow'] = {
    'accuracy': accuracy.result().numpy(),
    'precision': precision.result().numpy(),
    'recall': recall_macro.result().numpy(),
    'f1': f1.result().numpy(),
    'log_loss': log_loss.result().numpy(),
    'f beta_score': fbeta.result().numpy(),
    'jaccard_index': jaccard.result().numpy(),
    'AUC': auc.result().numpy()

}

**Evaluate Library:**

In [None]:
import evaluate

accuracy_evaluate = evaluate.load("accuracy")
precision_evaluate = evaluate.load("precision")
recall_evaluate = evaluate.load("recall")
f1_evaluate = evaluate.load("f1")
mcc_evaluate = evaluate.load("matthews_correlation")
confusion_matrix_evaluate = evaluate.load("confusion_matrix")


accuracy_result = accuracy_evaluate.compute(references=y_true, predictions=y_pred)
precision_result = precision_evaluate.compute(references=y_true, predictions=y_pred)
recall_result = recall_evaluate.compute(references=y_true, predictions=y_pred)
f1_result = f1_evaluate.compute(references=y_true, predictions=y_pred)
mcc_result = mcc_evaluate.compute(references=y_true, predictions=y_pred)
confusion_matrix_result = confusion_matrix_evaluate.compute(references=y_true, predictions=y_pred)




results['evaluate'] = {
    'accuracy': accuracy_result['accuracy'],
    'precision': precision_result['precision'],
    'recall': recall_result['recall'],
    'f1': f1_result['f1'],
    'mcc': mcc_result['matthews_correlation'],
    'confusion_matrix': confusion_matrix_result['confusion_matrix'],
    #"AUC" : auc['roc_auc']
}

**Torch Metrics Library:**

In [None]:
import torch
from torchmetrics import (Accuracy, Precision, Recall, F1Score, MatthewsCorrCoef, CohenKappa,
                          FBetaScore, JaccardIndex, ConfusionMatrix)
import torch.nn.functional as F
from torchmetrics.classification import BinaryAUROC
# Convert your true and predicted values to PyTorch tensors
y_true_tensor = torch.tensor(y_true)
y_pred_tensor = torch.tensor(y_pred)


# Define the metrics
accuracy = Accuracy('binary')
precision = Precision('binary')
recall = Recall('binary')
f1_score = F1Score('binary')
matthews_corrcoef = MatthewsCorrCoef('binary')
cohen_kappa = CohenKappa('binary')
fbeta_score = FBetaScore('binary',beta=0.5)
jaccard_index = JaccardIndex('binary')
confusion_matrix = ConfusionMatrix('binary')


auroc = BinaryAUROC()
auc_value = auroc(y_pred_tensor, y_true_tensor)


# Compute the metrics using TorchMetrics
results['TorchMetrics'] = {
    'accuracy': accuracy(y_pred_tensor, y_true_tensor).item(),
    'precision': precision(y_pred_tensor, y_true_tensor).item(),
    'recall': recall(y_pred_tensor, y_true_tensor).item(),
    'f1': f1_score(y_pred_tensor, y_true_tensor).item(),
    'mcc': matthews_corrcoef(y_pred_tensor, y_true_tensor).item(),
    'kappa': cohen_kappa(y_pred_tensor, y_true_tensor).item(),
    'f beta_score': fbeta_score(y_pred_tensor, y_true_tensor).item(),
    'jaccard_index': jaccard_index(y_pred_tensor, y_true_tensor).item(),
    'confusion_matrix': confusion_matrix(y_pred_tensor, y_true_tensor).numpy(),
    'AUC':auc_value.item()
}

**Mlxtend Library:**

In [None]:
from mlxtend.evaluate import confusion_matrix as mlxtend_confusion_matrix, accuracy_score as mlxtend_accuracy_score

results['MLxtend'] = {
    'accuracy': mlxtend_accuracy_score(y_true, y_pred),
    'confusion_matrix': mlxtend_confusion_matrix(y_true, y_pred)
}

**nlkt Library:**

In [None]:
from nltk.metrics import accuracy, precision, recall, f_measure, ConfusionMatrix

def multi_class_metrics(y_true, y_pred, labels):
    precisions = []
    recalls = []
    f_measures = []

    for label in labels:
        reference_set = set((i for i, y in enumerate(y_true) if y == label))
        test_set = set((i for i, y in enumerate(y_pred) if y == label))

        precisions.append(precision(reference_set, test_set))
        recalls.append(recall(reference_set, test_set))
        f_measures.append(f_measure(reference_set, test_set))

    avg_precision = sum(p for p in precisions if p is not None) / len(labels)
    avg_recall = sum(r for r in recalls if r is not None) / len(labels)
    avg_f_measure = sum(f for f in f_measures if f is not None) / len(labels)

    return avg_precision, avg_recall, avg_f_measure

results['nltk'] = {
    'accuracy': accuracy(y_true, y_pred),
    'precision': precision(set(i for i, y in enumerate(y_true) if y == 1), set(i for i, y in enumerate(y_pred) if y == 1)),
    'recall': recall(set(i for i, y in enumerate(y_true) if y == 1), set(i for i, y in enumerate(y_pred) if y == 1)),
    'f1': f_measure(set(i for i, y in enumerate(y_true) if y == 1), set(i for i, y in enumerate(y_pred) if y == 1)),
}

**Imbalanced-Learn Library:**

In [None]:
from imblearn.metrics import geometric_mean_score

results['Imbalanced-learn'] = {
    'geometric_mean': geometric_mean_score(y_true, y_pred)
}

**Ignite Library:**

In [None]:
from ignite.metrics import (Accuracy as IgniteAccuracy, Precision as IgnitePrecision, Recall as IgniteRecall,
                            Fbeta as IgniteFbeta, ConfusionMatrix as IgniteConfusionMatrix, CohenKappa as IgniteCohenKappa,
                            Loss as IgniteLoss)
from torch.nn.functional import binary_cross_entropy
from ignite.metrics import Loss
from ignite.metrics import ROC_AUC

ignite_accuracy = IgniteAccuracy()
ignite_precision = IgnitePrecision()
ignite_recall = IgniteRecall()
ignite_fbeta = IgniteFbeta(beta=0.5)
ignite_f1 = IgniteFbeta(beta=1.0) # F1 score is equivalent to Fbeta with beta=1.0
ignite_kappa = IgniteCohenKappa()
ignite_conf_matrix = IgniteConfusionMatrix(num_classes=2)

y_true_tensor_ignite = torch.tensor(y_true, dtype=torch.float32)
y_pred_tensor_ignite = torch.tensor(y_pred, dtype=torch.float32)
y_prob_tensor_ignite = torch.tensor(y_pred_proba, dtype=torch.float32)

# Define the binary cross-entropy loss function
ignite_loss = Loss(F.binary_cross_entropy)

y_prob_tensor_ignite = y_prob_tensor_ignite.squeeze()

from ignite.metrics import ROC_AUC

auc_metric = ROC_AUC()
auc_metric.update((y_pred_tensor_ignite, y_true_tensor_ignite))
auc_value = auc_metric.compute()




ignite_accuracy.update((y_pred_tensor_ignite, y_true_tensor_ignite))
ignite_precision.update((y_pred_tensor_ignite, y_true_tensor_ignite))
ignite_recall.update((y_pred_tensor_ignite, y_true_tensor_ignite))
ignite_fbeta.update((y_pred_tensor_ignite, y_true_tensor_ignite))
ignite_f1.update((y_pred_tensor_ignite, y_true_tensor_ignite))
ignite_kappa.update((y_pred_tensor_ignite, y_true_tensor_ignite))
ignite_loss.update((y_prob_tensor_ignite, y_true_tensor_ignite))


results['Ignite'] = {
    'accuracy': ignite_accuracy.compute(),
    'precision': ignite_precision.compute().item(),
    'recall': ignite_recall.compute().item(),
    'f1': ignite_f1.compute(),  # F1 score
    'f beta_score': ignite_fbeta.compute(),
    'kappa': ignite_kappa.compute(),
    'log_loss':  ignite_loss.compute(),
    'AUC' : auc_value
}

In [None]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

results_df = pd.DataFrame(results).T
results_df.index.name = 'Library'  
results_df.reset_index(inplace=True)  
results_df = results_df.rename_axis(None, axis=1)  
print(results_df)