In [1]:
%cd /kaggle/input/chemcancer-v2/src/
%mkdir /kaggle/working/Deep_Learning_metrics/
%mkdir /kaggle/working/During_train/
%mkdir /kaggle/working/CV_CNN_models
%mkdir /kaggle/working/CV_CNN_results
%mkdir /kaggle/working/CV_CNN_fold_data
%mkdir /kaggle/working/CV_CNN_cm

/kaggle/input/chemcancer-v2/src


In [2]:
import numpy as np
import matplotlib.pyplot as plt
import os
import time
from tensorflow.keras.optimizers import Adam
from data import *
from machine_learning_models import *
from deep_learning_models import *
from vision_transformer import *
from utils_dl_model import *
from utils_ml_model import print_ml_results
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint



In [3]:
# Load the dataset
def load_data(file_name):
    data = pd.read_csv(file_name)
    return data

# Extract the X and y data from the dataset.
def extract_data(data):
    # Extract the features from the data
    X = np.array(data.iloc[:, 4:], dtype = float)

    # Extract the target variable
    y = np.array(data['Cell type'])

    # Encode the labels
    encoder = LabelEncoder()
    y_encoded = encoder.fit_transform(y)

    # Return X and y
    return X, y_encoded, encoder

def display_label_mapping(encoder):
    """Display the mapping between original labels and encoded labels."""
    for index, label in enumerate(encoder.classes_):
        print(f"{index} -> {label}")

# If you run this function with your encoder in your environment, it will print the mapping.
# display_label_mapping(encoder)

In [4]:
def compute_basic_metrics_with_labels(confusion_matrix, mapping):
    """
    Compute TP, TN, FP, and FN for each class from a given confusion matrix.
    Return results with original labels.
    
    Parameters:
    - confusion_matrix: the input confusion matrix
    - mapping: mapping from encoded labels to original labels
    
    Returns:
    - A dictionary containing TP, TN, FP, and FN for each class with original labels.
    """
    
    num_classes = confusion_matrix.shape[0]
    metrics = {}
    
    for i in range(num_classes):
        label = mapping[i]
        tp = confusion_matrix[i, i]
        fp = sum(confusion_matrix[j, i] for j in range(num_classes) if j != i)
        fn = sum(confusion_matrix[i, j] for j in range(num_classes) if j != i)
        tn = sum(confusion_matrix[j, k] for j in range(num_classes) for k in range(num_classes) if j != i and k != i)
        
        metrics[label] = {
            'TP': tp,
            'FP': fp,
            'FN': fn,
            'TN': tn
        }
        
    return metrics

def compute_metrics_with_labels(confusion_matrix, mapping):
    """
    Compute Sensitivity, Specificity, and Precision for each class from a given confusion matrix.
    Return results with original labels.
    
    Parameters:
    - confusion_matrix: the input confusion matrix
    - mapping: mapping from encoded labels to original labels
    
    Returns:
    - A dictionary containing Sensitivity, Specificity, and Precision for each class with original labels.
    """
    
    num_classes = confusion_matrix.shape[0]
    metrics = {}
    
    for i in range(num_classes):
        label = mapping[i]
        tp = confusion_matrix[i, i]
        fp = sum(confusion_matrix[j, i] for j in range(num_classes) if j != i)
        fn = sum(confusion_matrix[i, j] for j in range(num_classes) if j != i)
        tn = sum(confusion_matrix[j, k] for j in range(num_classes) for k in range(num_classes) if j != i and k != i)
        
        sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
        precision = tp / (tp + fp) if (tp + fp) != 0 else 0
        
        metrics[label] = {
            'Sensitivity': sensitivity,
            'Specificity': specificity,
            'Precision': precision
        }
        
    return metrics

# Now, these functions will return dictionaries with the original labels as keys.

In [10]:
def compute_average_metrics_with_labels(confusion_matrix, mapping):
    """
    Compute average Sensitivity, Specificity, and Precision for all classes from a given confusion matrix.
    Return average results.
    
    Parameters:
    - confusion_matrix: the input confusion matrix
    - mapping: mapping from encoded labels to original labels
    
    Returns:
    - A dictionary containing average Sensitivity, Specificity, and Precision.
    """
    
    num_classes = confusion_matrix.shape[0]
    total_sensitivity, total_specificity, total_precision = 0, 0, 0
    
    for i in range(num_classes):
        tp = confusion_matrix[i, i]
        fp = sum(confusion_matrix[j, i] for j in range(num_classes) if j != i)
        fn = sum(confusion_matrix[i, j] for j in range(num_classes) if j != i)
        tn = sum(confusion_matrix[j, k] for j in range(num_classes) for k in range(num_classes) if j != i and k != i)
        
        sensitivity = tp / (tp + fn) if (tp + fn) != 0 else 0
        specificity = tn / (tn + fp) if (tn + fp) != 0 else 0
        precision = tp / (tp + fp) if (tp + fp) != 0 else 0
        
        total_sensitivity += sensitivity
        total_specificity += specificity
        total_precision += precision

    # Compute the averages
    avg_sensitivity = total_sensitivity / num_classes
    avg_specificity = total_specificity / num_classes
    avg_precision = total_precision / num_classes

    return {
        'Average Sensitivity': avg_sensitivity,
        'Average Specificity': avg_specificity,
        'Average Precision': avg_precision
    }

In [5]:
import pandas as pd

def metrics_to_dataframe(metrics_list, model_name):
    """
    Convert a list of metrics dictionaries to a pandas DataFrame.
    
    Parameters:
    - metrics_list: list of metrics dictionaries
    - model_name: name of the model (e.g., "CNN" or "VIT")
    
    Returns:
    - A pandas DataFrame representation of the metrics.
    """
    # Flatten the metrics for DataFrame conversion
    flattened_metrics = []
    for metrics in metrics_list:
        flat_dict = {}
        for key, value in metrics.items():
            for metric_name, metric_value in value.items():
                flat_key = f"{key} {metric_name}"
                flat_dict[flat_key] = metric_value
        flattened_metrics.append(flat_dict)
    
    # Convert to DataFrame
    df = pd.DataFrame(flattened_metrics)
    
    # Calculate the average across folds
    df_avg = df.mean().to_frame(name='Average').transpose()
    
    # Add model name and average to the DataFrame for presentation
    df['Model'] = model_name
    df = pd.concat([df, df_avg])
    df.iloc[-1, df.columns.get_loc('Model')] = model_name + ' Avg'
    
    # Reorder columns for better presentation
    columns_order = ['Model'] + list(df.columns[:-1])
    df = df[columns_order]
    
    return df

In [6]:
# Load the data
data_file = "/kaggle/input/chemcancer-v2/Data/HC05_HC07.csv"
data = load_data(data_file)

X_raw, y_encoded, encoder = extract_data(data)
display_label_mapping(encoder)

0 -> Cancer cell line
1 -> Monocyte
2 -> T-cells


## 1D-ori-CNN-Non-Filtered

In [14]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-ori-CNN-Non-Filtered/CV_CNN_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-ori-CNN-Non-Filtered/CV_CNN_fold_data/"
cm_model_path = "/kaggle/working/CV_CNN_cm/"

model_name = "ori-cnn-non-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path)
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

Fold 1:
Accuracy: 0.8842
Precision: 0.8795
Recall: 0.8788
F1 Score: 0.8789
----------------------------------------
Fold 2:
Accuracy: 0.8758
Precision: 0.8691
Recall: 0.8682
F1 Score: 0.8685
----------------------------------------
Fold 3:
Accuracy: 0.8884
Precision: 0.8834
Recall: 0.8820
F1 Score: 0.8822
----------------------------------------
Fold 4:
Accuracy: 0.8734
Precision: 0.8654
Recall: 0.8622
F1 Score: 0.8630
----------------------------------------
Fold 5:
Accuracy: 0.8734
Precision: 0.8667
Recall: 0.8644
F1 Score: 0.8648
----------------------------------------


[array([[174,   6,   3],
        [  6, 121,  15],
        [  6,  19, 125]]),
 array([[176,   4,   3],
        [  7, 114,  21],
        [  4,  20, 126]]),
 array([[177,   4,   2],
        [  7, 121,  14],
        [  4,  22, 124]]),
 array([[182,   0,   1],
        [ 10, 107,  24],
        [  3,  22, 125]]),
 array([[177,   3,   3],
        [  7, 108,  26],
        [  3,  18, 129]])]

In [15]:
mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
174.00 & 12.00 & 9.00 & 280.00 \\
176.00 & 11.00 & 7.00 & 281.00 \\
177.00 & 11.00 & 6.00 & 281.00 \\
182.00 & 13.00 & 1.00 & 278.00 \\
177.00 & 10.00 & 6.00 & 281.00 \\
177.20 & 11.40 & 5.80 & 280.20 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensitivity & Cancer cell line Specificity & Cancer cell line Precision \\
\midrule
0.95 & 0.96 & 0.94 \\
0.96 & 0.96 & 0.94 \\
0.97 & 0.96 & 0.94 \\
0.99 & 0.96 & 0.93 \\
0.97 & 0.97 & 0.95 \\
0.97 & 0.96 & 0.94 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrrr}
\toprule
Monocyte TP & Monocyte FP & Monocyte FN & Monocyte TN \\
\midrule
121.00 & 25.00 & 21.00 & 308.00 \\
114.00 & 24.00 & 28.00 & 309.00 \\
121.00 & 26.00 & 21.00 & 307.00 \\
107.00 & 22.00 & 34.00 & 311.00 \\
108.00 & 21.00 & 33.00 & 312.00 \\
114.20 & 23.60 & 27.40 & 309.40 \\
\bottomrule
\end{tabular}

\begin{tabular}{

## 1D-ori-CNN-Filtered

In [16]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-ori-CNN-Filtered/CV_CNN_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-ori-CNN-Filtered/CV_CNN_fold_data/"
cm_model_path = "/kaggle/working/CV_CNN_cm/"

model_name = "ori-cnn-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path)
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

Fold 1:
Accuracy: 0.8211
Precision: 0.8174
Recall: 0.8184
F1 Score: 0.8178
----------------------------------------
Fold 2:
Accuracy: 0.8063
Precision: 0.8042
Recall: 0.8042
F1 Score: 0.8033
----------------------------------------
Fold 3:
Accuracy: 0.7747
Precision: 0.7738
Recall: 0.7766
F1 Score: 0.7729
----------------------------------------
Fold 4:
Accuracy: 0.8038
Precision: 0.7978
Recall: 0.7978
F1 Score: 0.7975
----------------------------------------
Fold 5:
Accuracy: 0.8354
Precision: 0.8331
Recall: 0.8322
F1 Score: 0.8325
----------------------------------------
\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
156.00 & 22.00 & 27.00 & 270.00 \\
151.00 & 20.00 & 32.00 & 272.00 \\
140.00 & 25.00 & 43.00 & 267.00 \\
159.00 & 15.00 & 24.00 & 276.00 \\
160.00 & 22.00 & 23.00 & 269.00 \\
153.20 & 20.80 & 29.80 & 270.80 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensiti

## 1D-Aug-CNN-Non-Filtered

In [17]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-Aug-CNN-Non-Filtered/CV_CNN_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-Aug-CNN-Non-Filtered/CV_CNN_fold_data/"
cm_model_path = "/kaggle/working/CV_CNN_cm/"

model_name = "aug-cnn-non-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path)
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

Fold 1:
Accuracy: 0.8589
Precision: 0.8488
Recall: 0.8470
F1 Score: 0.8477
----------------------------------------
Fold 2:
Accuracy: 0.8739
Precision: 0.8638
Recall: 0.8627
F1 Score: 0.8631
----------------------------------------
Fold 3:
Accuracy: 0.8679
Precision: 0.8569
Recall: 0.8532
F1 Score: 0.8543
----------------------------------------
Fold 4:
Accuracy: 0.8769
Precision: 0.8720
Recall: 0.8672
F1 Score: 0.8669
----------------------------------------
Fold 5:
Accuracy: 0.8799
Precision: 0.8719
Recall: 0.8709
F1 Score: 0.8712
----------------------------------------
\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
129.00 & 10.00 & 6.00 & 188.00 \\
130.00 & 7.00 & 5.00 & 191.00 \\
133.00 & 9.00 & 2.00 & 189.00 \\
128.00 & 5.00 & 7.00 & 193.00 \\
129.00 & 8.00 & 6.00 & 190.00 \\
129.80 & 7.80 & 5.20 & 190.20 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensitivity & Canc

## 1D-Aug-CNN-Filtered

In [18]:
# Given the paths you provided, let's generate confusion matrices for the VIT models:
cnn_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-Aug-CNN-Filtered/CV_CNN_models/"
data_model_path = "/kaggle/input/cmatrix-1d-ori-aug-cnn/1D-Aug-CNN-Filtered/CV_CNN_fold_data/"
cm_model_path = "/kaggle/working/CV_CNN_cm/"

model_name = "aug-cnn-filtered"

# Lists to store metrics
accuracies = []
precisions = []
recalls = []
f1_scores = []
confusion_matrices_cnn = []

# Loop through each fold
for fold in range(1, 6):
    # Load datasets for this fold
    with open(f"{data_model_path}fold_{fold}_data.pkl", 'rb') as f:
        fold_data = pickle.load(f)
        
    X_test = fold_data['X_test']
    y_test = fold_data['y_test']
    
    # Load the model for this fold
    model_file = f"temp_best_fold_{fold}.h5"
    model_path = os.path.join(cnn_model_path, model_file)
    model = tf.keras.models.load_model(model_path)
    
    # Predict and evaluate
    y_pred = np.argmax(model.predict(X_test), axis=-1)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    # Save the confusion matrix
    with open(f"{cm_model_path}_{model_name}_cm_fold_{fold}.pkl", 'wb') as cm_file:
        pickle.dump(cm, cm_file)
    
    # Store metrics
    accuracies.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1_scores.append(f1)
    confusion_matrices_cnn.append(cm)
    
    # Print metrics for this fold
    print(f"Fold {fold}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("-" * 40)

# This will give you a list of confusion matrices for each fold of the VIT models
confusion_matrices_cnn

mapping = {
    0: 'Cancer cell line',
    1: 'Monocyte',
    2: 'T-cells'
}

# Lists to store metrics for each fold
cnn_basic_metrics_list = [compute_basic_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_advanced_metrics_list = [compute_metrics_with_labels(cm, mapping) for cm in confusion_matrices_cnn]
cnn_basic_df = metrics_to_dataframe(cnn_basic_metrics_list, 'CNN-500')
cnn_advanced_df = metrics_to_dataframe(cnn_advanced_metrics_list, 'CNN-500')

# Filter the DataFrame for each label
cancer_basic_df = cnn_basic_df.filter(like='Cancer cell line', axis=1)
monocyte_basic_df = cnn_basic_df.filter(like='Monocyte', axis=1)
t_cells_basic_df = cnn_basic_df.filter(like='T-cells', axis=1)

# Convert these subsets to LaTeX
cancer_basic_latex = cancer_basic_df.to_latex(index=False, float_format="%.2f")
monocyte_basic_latex = monocyte_basic_df.to_latex(index=False, float_format="%.2f")
t_cells_basic_latex = t_cells_basic_df.to_latex(index=False, float_format="%.2f")

# Repeat the same process for the combined_advanced_df DataFrame

cancer_advanced_df = cnn_advanced_df.filter(like='Cancer cell line', axis=1)
monocyte_advanced_df = cnn_advanced_df.filter(like='Monocyte', axis=1)
t_cells_advanced_df = cnn_advanced_df.filter(like='T-cells', axis=1)

cancer_advanced_latex = cancer_advanced_df.to_latex(index=False, float_format="%.2f")
monocyte_advanced_latex = monocyte_advanced_df.to_latex(index=False, float_format="%.2f")
t_cells_advanced_latex = t_cells_advanced_df.to_latex(index=False, float_format="%.2f")

print(cancer_basic_latex)
print(cancer_advanced_latex)

print(monocyte_basic_latex)
print(monocyte_advanced_latex)

print(t_cells_basic_latex)
print(t_cells_advanced_latex)

Fold 1:
Accuracy: 0.7538
Precision: 0.7428
Recall: 0.7408
F1 Score: 0.7416
----------------------------------------
Fold 2:
Accuracy: 0.7718
Precision: 0.7682
Recall: 0.7724
F1 Score: 0.7681
----------------------------------------
Fold 3:
Accuracy: 0.7718
Precision: 0.7627
Recall: 0.7600
F1 Score: 0.7601
----------------------------------------
Fold 4:
Accuracy: 0.7808
Precision: 0.7805
Recall: 0.7846
F1 Score: 0.7780
----------------------------------------
Fold 5:
Accuracy: 0.8078
Precision: 0.8007
Recall: 0.7981
F1 Score: 0.7992
----------------------------------------
\begin{tabular}{rrrr}
\toprule
Cancer cell line TP & Cancer cell line FP & Cancer cell line FN & Cancer cell line TN \\
\midrule
116.00 & 25.00 & 19.00 & 173.00 \\
104.00 & 14.00 & 31.00 & 184.00 \\
118.00 & 24.00 & 17.00 & 174.00 \\
102.00 & 9.00 & 33.00 & 189.00 \\
120.00 & 20.00 & 15.00 & 178.00 \\
112.00 & 18.40 & 23.00 & 179.60 \\
\bottomrule
\end{tabular}

\begin{tabular}{rrr}
\toprule
Cancer cell line Sensitiv