In [1]:
%cd /kaggle/input/chemcancer-v2/src/
%mkdir /kaggle/working/Deep_Learning_metrics/
%mkdir /kaggle/working/During_train/
%mkdir /kaggle/working/CV_VIT_models
%mkdir /kaggle/working/CV_VIT_results
%mkdir /kaggle/working/CV_VIT_fold_data
%mkdir /kaggle/working/CV_VIT_cm

/kaggle/input/chemcancer-v2/src


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import time
from tensorflow.keras.optimizers import Adam
from data import *
from machine_learning_models import *
from deep_learning_models import *
from vision_transformer import *
from utils_dl_model import *
from utils_ml_model import print_ml_results
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint



In [5]:
# Load the dataset
def load_data(file_name):
    data = pd.read_csv(file_name)
    return data

# Extract the X and y data from the dataset.
def extract_data(data):
    # Extract the features from the data
    X = np.array(data.iloc[:, 4:], dtype = float)

    # Extract the target variable
    y = np.array(data['Cell type'])

    # Encode the labels
    encoder = LabelEncoder()
    y_encoded = encoder.fit_transform(y)

    # Return X and y
    return X, y_encoded, encoder

def display_label_mapping(encoder):
    """Display the mapping between original labels and encoded labels."""
    for index, label in enumerate(encoder.classes_):
        print(f"{index} -> {label}")

# If you run this function with your encoder in your environment, it will print the mapping.
# display_label_mapping(encoder)

In [6]:
def compute_basic_metrics_with_labels(confusion_matrix, mapping):
    """
    Compute TP, TN, FP, and FN for each class from a given confusion matrix.
    Return results with original labels.
    
    Parameters:
    - confusion_matrix: the input confusion matrix
    - mapping: mapping from encoded labels to original labels
    
    Returns:
    - A dictionary containing TP, TN, FP, and FN for each class with original labels.
    """
    
    num_classes = confusion_matrix.shape[0]
    metrics = {}
    
    for i in range(num_classes):
        label = mapping[i]
        tp = confusion_matrix[i, i]
        fp = sum(confusion_matrix[j, i] for j in range(num_classes) if j != i)
        fn = sum(confusion_matrix[i, j] for j in range(num_classes) if j != i)
        tn = sum(confusion_matrix[j, k] for j in range(num_classes) for k in range(num_classes) if j != i and k != i)
        
        metrics[label] = {
            'TP': tp,
            'FP': fp,
            'FN': fn,
            'TN': tn
        }
        
    return metrics

def compute_metrics_with_labels(confusion_matrix, mapping):
    """
    Compute Sensitivity, Specificity, and Precision for each class from a given confusion matrix.
    Return results with original labels.
    
    Parameters:
    - confusion_matrix: the input confusion matrix
    - mapping: mapping from encoded labels to original labels
    
    Returns:
    - A dictionary containing Sensitivity, Specificity, and Precision for each class with original labels.
    """
    
    num_classes = confusion_matrix.shape[0]
    metrics = {}
    
    for i in range(num_classes):
        label = mapping[i]
        tp = confusion_matrix[i, i]
        fp = sum(confusion_matrix[j, i] for j in range(num_classes) if j != i)
        fn = sum(confusion_matrix[i, j] for j in range(num_classes) if j != i)
        tn = sum(confusion_matrix[j, k] for j in range(num_classes) for k in range(num_classes) if j != i and k != i)
        
        sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
        precision = tp / (tp + fp) if (tp + fp) != 0 else 0
        
        metrics[label] = {
            'Sensitivity': sensitivity,
            'Specificity': specificity,
            'Precision': precision
        }
        
    return metrics

# Now, these functions will return dictionaries with the original labels as keys.

In [7]:
import pandas as pd

def metrics_to_dataframe(metrics_list, model_name):
    """
    Convert a list of metrics dictionaries to a pandas DataFrame.
    
    Parameters:
    - metrics_list: list of metrics dictionaries
    - model_name: name of the model (e.g., "CNN" or "VIT")
    
    Returns:
    - A pandas DataFrame representation of the metrics.
    """
    # Flatten the metrics for DataFrame conversion
    flattened_metrics = []
    for metrics in metrics_list:
        flat_dict = {}
        for key, value in metrics.items():
            for metric_name, metric_value in value.items():
                flat_key = f"{key} {metric_name}"
                flat_dict[flat_key] = metric_value
        flattened_metrics.append(flat_dict)
    
    # Convert to DataFrame
    df = pd.DataFrame(flattened_metrics)
    
    # Calculate the average across folds
    df_avg = df.mean().to_frame(name='Average').transpose()
    
    # Add model name and average to the DataFrame for presentation
    df['Model'] = model_name
    df = pd.concat([df, df_avg])
    df.iloc[-1, df.columns.get_loc('Model')] = model_name + ' Avg'
    
    # Reorder columns for better presentation
    columns_order = ['Model'] + list(df.columns[:-1])
    df = df[columns_order]
    
    return df

## 1D-ori-VIT-Non-Filtered

In [12]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-ori-VIT-non-filtered/CV_VIT_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-ori-VIT-non-filtered/CV_fold_data/"
cm_model_path = "/kaggle/working/CV_VIT_cm/"

model_name = "ori-vit-non-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path, custom_objects={
        'ClassToken': ClassToken, 'TransformerBlock': TransformerBlock})
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

Fold 1:
Accuracy: 0.7832
Precision: 0.7848
Recall: 0.7748
F1 Score: 0.7772
----------------------------------------
Fold 2:
Accuracy: 0.7558
Precision: 0.7592
Recall: 0.7457
F1 Score: 0.7480
----------------------------------------
Fold 3:
Accuracy: 0.7726
Precision: 0.7709
Recall: 0.7662
F1 Score: 0.7678
----------------------------------------
Fold 4:
Accuracy: 0.7447
Precision: 0.7359
Recall: 0.7355
F1 Score: 0.7355
----------------------------------------
Fold 5:
Accuracy: 0.7743
Precision: 0.7688
Recall: 0.7665
F1 Score: 0.7642
----------------------------------------


[array([[160,   7,  16],
        [ 18,  98,  26],
        [ 22,  14, 114]]),
 array([[158,   7,  18],
        [ 24,  90,  28],
        [ 24,  15, 111]]),
 array([[156,   7,  20],
        [ 20, 105,  17],
        [ 22,  22, 106]]),
 array([[156,  14,  13],
        [ 16,  96,  29],
        [ 17,  32, 101]]),
 array([[162,   9,  12],
        [ 12, 112,  17],
        [ 21,  36,  93]])]

In [13]:
mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
160.00 & 40.00 & 23.00 & 252.00 \\
158.00 & 48.00 & 25.00 & 244.00 \\
156.00 & 42.00 & 27.00 & 250.00 \\
156.00 & 33.00 & 27.00 & 258.00 \\
162.00 & 33.00 & 21.00 & 258.00 \\
158.40 & 39.20 & 24.60 & 252.40 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensitivity & Cancer cell line Specificity & Cancer cell line Precision \\
\midrule
0.87 & 0.86 & 0.80 \\
0.86 & 0.84 & 0.77 \\
0.85 & 0.86 & 0.79 \\
0.85 & 0.89 & 0.83 \\
0.89 & 0.89 & 0.83 \\
0.87 & 0.87 & 0.80 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrrr}
\toprule
Monocyte TP & Monocyte FP & Monocyte FN & Monocyte TN \\
\midrule
98.00 & 21.00 & 44.00 & 312.00 \\
90.00 & 22.00 & 52.00 & 311.00 \\
105.00 & 29.00 & 37.00 & 304.00 \\
96.00 & 46.00 & 45.00 & 287.00 \\
112.00 & 45.00 & 29.00 & 288.00 \\
100.20 & 32.60 & 41.40 & 300.40 \\
\bottomrule
\end{tabular}

\begin{tabula

## 1D-ori-VIT-Filtered

In [18]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-ori-VIT-non-filtered/CV_VIT_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-ori-VIT-non-filtered/CV_fold_data/"
cm_model_path = "/kaggle/working/CV_VIT_cm/"

model_name = "ori-vit-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path, custom_objects={
        'ClassToken': ClassToken, 'TransformerBlock': TransformerBlock})
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

Fold 1:
Accuracy: 0.7832
Precision: 0.7848
Recall: 0.7748
F1 Score: 0.7772
----------------------------------------
Fold 2:
Accuracy: 0.7558
Precision: 0.7592
Recall: 0.7457
F1 Score: 0.7480
----------------------------------------
Fold 3:
Accuracy: 0.7726
Precision: 0.7709
Recall: 0.7662
F1 Score: 0.7678
----------------------------------------
Fold 4:
Accuracy: 0.7447
Precision: 0.7359
Recall: 0.7355
F1 Score: 0.7355
----------------------------------------
Fold 5:
Accuracy: 0.7743
Precision: 0.7688
Recall: 0.7665
F1 Score: 0.7642
----------------------------------------
\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
160.00 & 40.00 & 23.00 & 252.00 \\
158.00 & 48.00 & 25.00 & 244.00 \\
156.00 & 42.00 & 27.00 & 250.00 \\
156.00 & 33.00 & 27.00 & 258.00 \\
162.00 & 33.00 & 21.00 & 258.00 \\
158.40 & 39.20 & 24.60 & 252.40 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensiti

## 1D-Aug-VIT-Non-Filtered

In [19]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-Aug-VIT-Non-Filtered/CV_VIT_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-Aug-VIT-Non-Filtered/CV_fold_data/"
cm_model_path = "/kaggle/working/CV_VIT_cm/"

model_name = "aug-vit-non-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path, custom_objects={
        'ClassToken': ClassToken, 'TransformerBlock': TransformerBlock})
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

Fold 1:
Accuracy: 0.7658
Precision: 0.7591
Recall: 0.7471
F1 Score: 0.7499
----------------------------------------
Fold 2:
Accuracy: 0.8168
Precision: 0.8113
Recall: 0.8106
F1 Score: 0.8106
----------------------------------------
Fold 3:
Accuracy: 0.7838
Precision: 0.7788
Recall: 0.7731
F1 Score: 0.7749
----------------------------------------
Fold 4:
Accuracy: 0.8108
Precision: 0.8048
Recall: 0.7988
F1 Score: 0.8010
----------------------------------------
Fold 5:
Accuracy: 0.7748
Precision: 0.7654
Recall: 0.7656
F1 Score: 0.7654
----------------------------------------
\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
124.00 & 32.00 & 11.00 & 166.00 \\
117.00 & 16.00 & 18.00 & 182.00 \\
118.00 & 27.00 & 17.00 & 171.00 \\
123.00 & 22.00 & 12.00 & 176.00 \\
115.00 & 19.00 & 20.00 & 179.00 \\
119.40 & 23.20 & 15.60 & 174.80 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensiti

## 1D-Aug-VIT-Filtered

In [20]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-Aug-VIT-Filtered/CV_VIT_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-vit/1D-Aug-VIT-Filtered/CV_fold_data/"
cm_model_path = "/kaggle/working/CV_VIT_cm/"

model_name = "aug-vit-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path, custom_objects={
        'ClassToken': ClassToken, 'TransformerBlock': TransformerBlock})
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

Fold 1:
Accuracy: 0.7568
Precision: 0.7520
Recall: 0.7491
F1 Score: 0.7466
----------------------------------------
Fold 2:
Accuracy: 0.7267
Precision: 0.7239
Recall: 0.7266
F1 Score: 0.7222
----------------------------------------
Fold 3:
Accuracy: 0.6697
Precision: 0.6795
Recall: 0.6686
F1 Score: 0.6596
----------------------------------------
Fold 4:
Accuracy: 0.7267
Precision: 0.7308
Recall: 0.7206
F1 Score: 0.7152
----------------------------------------
Fold 5:
Accuracy: 0.7417
Precision: 0.7388
Recall: 0.7410
F1 Score: 0.7364
----------------------------------------
\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
112.00 & 21.00 & 23.00 & 177.00 \\
99.00 & 18.00 & 36.00 & 180.00 \\
94.00 & 22.00 & 41.00 & 176.00 \\
107.00 & 27.00 & 28.00 & 171.00 \\
102.00 & 18.00 & 33.00 & 180.00 \\
102.80 & 21.20 & 32.20 & 176.80 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensitivi