In [63]:
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import os
import cv2
import seaborn as sns
import json
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
    confusion_matrix
)

**Dataste Citation:** Kermany D, Goldbaum M, Cai W et al. Identifying Medical Diagnoses and Treatable Diseases by Image-Based Deep Learning. Cell. 2018; 172(5):1122-1131. doi:10.1016/j.cell.2018.02.010.

## Load models

Models from:

#### Model 1 
  - https://www.kaggle.com/code/mohamedgobara/96-1-in-retinal-oct-cnn-model



In [43]:
# Load model1 (full Keras model with architecture + weights)
model_path1 = "../models/Retinal_Model.h5"
model1 = load_model(model_path1, compile=False)

# Print summaries
print("Model 1 Summary:")
print(model1.summary())

Model 1 Summary:
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 xception (Functional)       (None, 2048)              20861480  
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 128)               262272    
                                                                 
 dropout_1 (Dropout)         (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 4)                 516       
                                                                 
Total params: 21,124,268
Trainable para

#### Model 2
  - https://www.kaggle.com/code/arbazkhan971/retina-damage-classification-95-accuracy/output

In [47]:
# Load model1 (full Keras model with architecture + weights)
model_path2 = "../models/finetuned_model.h5"
model2 = load_model(model_path2, compile=False)

# Print summaries
print("Model 1 Summary:")
print(model2.summary())

Model 1 Summary:
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 block1_conv1 (Conv2D)       (None, 150, 150, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 150, 150, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 75, 75, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 75, 75, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 75, 75, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 37, 37, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 37,

### Compile models
Recompile the tf models

In [49]:
model1.compile(optimizer=Adam(learning_rate=0.001),  # Use your desired optimizer
              loss='categorical_crossentropy',  # Use your loss function
              metrics=['accuracy'])  # Use your desired metrics
model2.compile(optimizer=Adam(learning_rate=0.001),  # Use your desired optimizer
              loss='categorical_crossentropy',  # Use your loss function
              metrics=['accuracy'])  # Use your desired metrics


### Define labels

In [50]:
labels = ['CNV', 'DME', 'DRUSEN', 'NORMAL']

## Usage

### Preprocess image
Resize image to make it compatible to the model requeriments

In [57]:
def preprocess_image(image_path, target_size):
    img = cv2.imread(image_path)  # Load image
    img = cv2.resize(img, target_size)  # Resize to match model input
    img = np.expand_dims(img, axis=0)  # Add batch dimension
    return img

### Prediction functions

In [60]:
def predict_image_probs(image_path, model, labels, target_size):
    """Returns probabilities for all classes."""
    img = preprocess_image(image_path, target_size)
    predictions = model.predict(img, verbose=0)[0]  # Get probabilities
    return {labels[i]: float(predictions[i]) for i in range(len(labels))}

def predict_image_label(image_path, model, labels, target_size=(299, 299)):
    """Returns the most probable label and the full probability dictionary."""
    prediction_dict = predict_image_probs(image_path, model, labels, target_size)
    max_label = max(prediction_dict, key=prediction_dict.get)
    return max_label, prediction_dict  # Return both in one function call


### Test images functions

Get images from:

The first dataset is: [Large Dataset of Labeled Optical Coherence Tomography (OCT) and Chest X-Ray Images](https://data.mendeley.com/datasets/rscbjbr9sj/3) Kermany D, Goldbaum M, Cai W et al. Identifying Medical Diagnoses and Treatable Diseases by Image-Based Deep Learning. Cell. 2018; 172(5):1122-1131. doi:10.1016/j.cell.2018.02.010.



The seconda one is: [Labeled Optical Coherence Tomography (OCT) and Chest X-Ray Images for Classification](https://data.mendeley.com/datasets/rscbjbr9sj/2) Kermany, Daniel; Zhang, Kang; Goldbaum, Michael (2018), “Labeled Optical Coherence Tomography (OCT) and Chest X-Ray Images for Classification”, Mendeley Data, V2, doi: 10.17632/rscbjbr9sj.2.




In [66]:


# Define folders
folder_path1 = "../data/ZhangLabData/CellData/OCT/test"
folder_path2 = "../data/OCT2017/OCT2017/test"

# Initialize lists
image_paths = []
true_labels = []

# Función para agregar imágenes desde un folder
def add_images_from_folder(folder_path):
    for label in os.listdir(folder_path):  # Each subfolder is a label
        label_path = os.path.join(folder_path, label)
        if os.path.isdir(label_path):
            for img_name in os.listdir(label_path):
                if img_name.endswith(".jpeg"):
                    image_paths.append(os.path.join(label_path, img_name))
                    true_labels.append(label)

# Agrega imágenes de ambos folders
add_images_from_folder(folder_path1)
add_images_from_folder(folder_path2)

# Verificación
print(f"Total images: {len(image_paths)}")
print(f"Sample labels: {set(true_labels)}")


Total images: 2000
Sample labels: {'CNV', 'NORMAL', 'DRUSEN', 'DME'}


## Test models

Classify images and save the results

#### Model 1 

In [67]:
# Lists to store ground truth and predictions
y_true1 = []
y_pred1 = []

# Loop through each image, preprocess it, and make predictions
for img_path, true_label in zip(image_paths, true_labels):
    predicted_label, predicted_probs = predict_image_label(img_path, model1, labels)

    # Store results for evaluation
    y_true1.append(true_label)  # Ground truth label
    y_pred1.append(predicted_label)  # Predicted label

# Convert labels to numerical format
y_true_numeric1 = [labels.index(label) for label in y_true1]  # Convert class names to indices
y_pred_numeric1 = [labels.index(label) for label in y_pred1]


#### Model 2

In [68]:
# Lists to store ground truth and predictions
y_true2 = []
y_pred2 = []

# Loop through each image, preprocess it, and make predictions
for img_path, true_label in zip(image_paths, true_labels):
    predicted_label, predicted_probs = predict_image_label(img_path, model2, labels, target_size=(150, 150))

    # Store results for evaluation
    y_true2.append(true_label)  # Ground truth label
    y_pred2.append(predicted_label)  # Predicted label

# Convert labels to numerical format
y_true_numeric2 = [labels.index(label) for label in y_true2]  # Convert class names to indices
y_pred_numeric2 = [labels.index(label) for label in y_pred2]

### Show test metrics

In [69]:

def evaluate_classification_results(y_true, y_pred, labels, output_dir):
    """
    Evaluate classification results and save metrics, report, and confusion matrix.

    Parameters:
    - y_true: list or array of true class labels (numeric or string)
    - y_pred: list or array of predicted class labels (numeric or string)
    - labels: list of label names in the correct order
    - output_dir: path to the folder where output files should be saved
    """
    os.makedirs(output_dir, exist_ok=True)

    # Compute total images
    total_images = len(y_true)

    # Compute metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average="weighted")
    recall = recall_score(y_true, y_pred, average="weighted")
    f1 = f1_score(y_true, y_pred, average="weighted")

    metrics = {
        "Total Images": total_images,
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    }

    # Save metrics to JSON
    with open(os.path.join(output_dir, "metrics_post.json"), "w") as f:
        json.dump(metrics, f, indent=4)
    print(json.dumps(metrics, indent=4))

    # Save classification report
    report = classification_report(y_true, y_pred, target_names=labels, output_dict=True)
    with open(os.path.join(output_dir, "classification_report_post.json"), "w") as f:
        json.dump(report, f, indent=4)
    print("Classification Report:\n", classification_report(y_true, y_pred, target_names=labels))

    # Confusion matrix
    conf_matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title("Confusion Matrix")
    plt.savefig(os.path.join(output_dir, "confusion_matrix_post.png"))
    plt.close()


In [70]:
evaluate_classification_results(y_true_numeric1, y_pred_numeric1, labels, "../results/preliminary/model1_evaluation")

{
    "Total Images": 2000,
    "Accuracy": 0.985,
    "Precision": 0.9853176214513577,
    "Recall": 0.985,
    "F1 Score": 0.9849931696062295
}
Classification Report:
               precision    recall  f1-score   support

         CNV       0.96      1.00      0.98       500
         DME       0.99      1.00      1.00       500
      DRUSEN       0.99      0.96      0.97       500
      NORMAL       1.00      0.99      0.99       500

    accuracy                           0.98      2000
   macro avg       0.99      0.98      0.98      2000
weighted avg       0.99      0.98      0.98      2000



In [71]:
evaluate_classification_results(y_true_numeric2, y_pred_numeric2, labels, "../results/preliminary/model2_evaluation")

{
    "Total Images": 2000,
    "Accuracy": 0.9005,
    "Precision": 0.9244454201244976,
    "Recall": 0.9005,
    "F1 Score": 0.9020052236450552
}
Classification Report:
               precision    recall  f1-score   support

         CNV       0.73      1.00      0.84       500
         DME       1.00      0.95      0.97       500
      DRUSEN       0.99      0.73      0.84       500
      NORMAL       0.98      0.93      0.95       500

    accuracy                           0.90      2000
   macro avg       0.92      0.90      0.90      2000
weighted avg       0.92      0.90      0.90      2000

