In [2]:
# =============================
# 📦 Imports for Data Handling & Model
# =============================

# 🔗 Google Drive access
from google.colab import drive

# 📁 File and image handling
import os
import random
import numpy as np
import cv2
from PIL import Image

# 📊 Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# 📈 Evaluation metrics
from sklearn.metrics import (
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
    accuracy_score
)

# 🤖 TensorFlow and Keras (Deep Learning)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten, Dense, Dropout
)
from tensorflow.keras.optimizers import Adam


In [1]:
import os
from google.colab import drive

# =============================
# 🔗 Mount Google Drive
# =============================
# Required for accessing dataset folders stored on Google Drive
drive.mount('/content/drive')

# =============================
# 📁 Define Dataset Paths
# =============================
# Set the paths to the train, test, and validation image folders
test_folder_path = "/content/drive/MyDrive/Dataset/Test"
train_folder_path = "/content/drive/MyDrive/Dataset/Train"
validation_folder_path = "/content/drive/MyDrive/Dataset/Validation"

# =============================
# 📥 Function to Count Images per Class
# =============================
def count_images_in_folder(folder_path):
    """
    Counts the number of images per label in a dataset folder.

    Args:
        folder_path (str): Path to the dataset folder.

    Returns:
        dict: Number of images for each label ("IPF Positive", "IPF Negative").
        int: Total number of images in the folder.
    """
    counts = {}
    total = 0
    for label in ["IPF Positive", "IPF Negative"]:
        label_folder = os.path.join(folder_path, label)
        if os.path.exists(label_folder):
            num_images = len([
                f for f in os.listdir(label_folder)
                if f.endswith(('.png', '.jpg', '.jpeg'))
            ])
            counts[label] = num_images
            total += num_images
        else:
            # If folder does not exist, count is zero
            counts[label] = 0
    return counts, total

# =============================
# 📊 Count Images in Each Set
# =============================
train_counts, train_total = count_images_in_folder(train_folder_path)
test_counts, test_total = count_images_in_folder(test_folder_path)
val_counts, val_total = count_images_in_folder(validation_folder_path)

# Calculate total number of images across all datasets
grand_total = train_total + test_total + val_total

# =============================
# 🖨️ Print Dataset Summary
# =============================
print(f"📁 Train Set: {train_total} images")
for label, count in train_counts.items():
    print(f"  - {label}: {count} images ({(count/train_total)*100:.2f}%)")

print(f"\n📁 Test Set: {test_total} images")
for label, count in test_counts.items():
    print(f"  - {label}: {count} images ({(count/test_total)*100:.2f}%)")

print(f"\n📁 Validation Set: {val_total} images")
for label, count in val_counts.items():
    print(f"  - {label}: {count} images ({(count/val_total)*100:.2f}%)")

print(f"\n📊 Grand Total: {grand_total} images")
print(f"  - Training   : {(train_total / grand_total) * 100:.2f}%")
print(f"  - Testing    : {(test_total / grand_total) * 100:.2f}%")
print(f"  - Validation : {(val_total / grand_total) * 100:.2f}%")


Mounted at /content/drive
📁 Train Set: 3073 images
  - IPF Positive: 1631 images (53.08%)
  - IPF Negative: 1442 images (46.92%)

📁 Test Set: 220 images
  - IPF Positive: 103 images (46.82%)
  - IPF Negative: 117 images (53.18%)

📁 Validation Set: 1098 images
  - IPF Positive: 535 images (48.72%)
  - IPF Negative: 563 images (51.28%)

📊 Grand Total: 4391 images
  - Training   : 69.98%
  - Testing    : 5.01%
  - Validation : 25.01%


In [3]:
# =============================
# 🔗 Mount Google Drive and Define Dataset Paths
# =============================

# Mount Google Drive (force_remount ensures re-connection if already mounted)
drive.mount("/content/drive", force_remount=True)

# Define folder paths for dataset splits
train_folder_path = "/content/drive/MyDrive/Dataset/Train"
validation_folder_path = "/content/drive/MyDrive/Dataset/Validation"
test_folder_path = "/content/drive/MyDrive/Dataset/Test"


Mounted at /content/drive


In [4]:
# =============================
# 📥 Image Loading Function
# =============================
def load_data(image_folder_path, image_size, data, label):
    """
    Loads grayscale images from a specified folder, resizes them,
    and appends them with their label to a given list.

    Args:
        image_folder_path (str): Path to the image folder.
        image_size (int): Target width and height (images will be resized to image_size x image_size).
        data (list): List to which (image, label) pairs will be appended.
        label (int): Integer label to assign to all images in this folder.
    """
    if not os.path.exists(image_folder_path):
        print(f"❌ Error: Folder not found at {image_folder_path}")
        return

    for imagename in os.listdir(image_folder_path):
        if imagename.lower().endswith(('.jpg', '.jpeg', '.png')):
            try:
                # Construct full image path
                image_path = os.path.join(image_folder_path, imagename)

                # Load the image in grayscale
                image_array = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

                # Resize to the desired dimensions
                image_array = cv2.resize(image_array, (image_size, image_size))

                # Append image and label if image is valid
                if image_array is not None and image_array.size > 0:
                    data.append([image_array, label])

            except Exception as e:
                print(f"⚠️ Error opening image '{imagename}': {e}")


In [5]:
# =============================
# 🏷️ Dataset Preparation
# =============================

# Image dimensions for resizing (grayscale: 256x256, can be changed according to the dataset)
IMAGE_SIZE = 256

# Class labels used for folder names and labeling
labels = ["IPF Positive", "IPF Negative"]

def build_data(path, labels=labels):
    """
    Loads and labels image data from a given directory path.

    Args:
        path (str): Base folder containing class subfolders.
        labels (list): List of label names corresponding to folder names.

    Returns:
        list: Shuffled list of (image, label) pairs.
    """
    data = []

    # Load IPF Positive images with label 0
    load_data(os.path.join(path, "IPF Positive"), IMAGE_SIZE, data, labels.index("IPF Positive"))

    # Load IPF Negative images with label 1
    load_data(os.path.join(path, "IPF Negative"), IMAGE_SIZE, data, labels.index("IPF Negative"))

    # Shuffle to randomize order
    random.shuffle(data)

    return data

# Prepare training, validation, and test datasets
train_data = build_data(train_folder_path)
validation_data = build_data(validation_folder_path)
test_data = build_data(test_folder_path)


In [6]:
# =============================
# 🔄 Convert Data to Numpy Arrays
# =============================

def convert_to_np_array(data):
    """
    Splits a list of (image, label) pairs into separate NumPy arrays.

    Args:
        data (list): List of (image_array, label) pairs.

    Returns:
        Tuple[np.ndarray, np.ndarray]: Arrays for X (images) and Y (labels).
    """
    x, y = [], []
    for image, label in data:
        x.append(image)
        y.append(label)
    X = np.array(x, dtype=float)
    Y = np.array(y, dtype=float)
    return X, Y

# Convert training, validation, and test data into X (image) and Y (label) arrays
train_data_x, train_data_y = convert_to_np_array(train_data)
validation_data_x, validation_data_y = convert_to_np_array(validation_data)
test_data_x, test_data_y = convert_to_np_array(test_data)

# Add channel dimension for grayscale (shape becomes: [batch, 256, 256, 1])
train_data_x = np.expand_dims(train_data_x, axis=-1)
validation_data_x = np.expand_dims(validation_data_x, axis=-1)
test_data_x = np.expand_dims(test_data_x, axis=-1)

# Normalize pixel values to range [0, 1]
# Note: Dividing by 255.0 is standard for 8-bit grayscale images (values originally 0–255)
train_data_x = train_data_x / 255.0
validation_data_x = validation_data_x / 255.0
test_data_x = test_data_x / 255.0


In [7]:
# =============================
# 🧠 Model Hyperparameters
# =============================

BATCH_SIZE = 64         # Number of training samples per batch
EPOCHS = 150            # Total number of passes through the entire training set
LEARNING_RATE = 5e-6    # Learning rate for the optimizer (small for fine-tuning)
DROPOUT_RATE = 0.45      # Fraction of neurons to drop during training (prevents overfitting)


In [8]:
# =============================
# 🏗️ Build CNN Model
# =============================

# Simple custom CNN for binary image classification (grayscale 256x256 inputs)
model = Sequential([
    # Convolutional Block 1
    Conv2D(16, (3, 3), activation='relu', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 1)),
    MaxPooling2D((2, 2)),

    # Convolutional Block 2
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    # Convolutional Block 3
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    # Convolutional Block 4
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),

    # Fully Connected Head
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(DROPOUT_RATE),

    # Output Layer: 2 neurons (IPF Positive, IPF Negative)
    Dense(2, activation='softmax')
])

# Compile the model with Adam optimizer and sparse categorical loss
model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss='sparse_categorical_crossentropy',  # Use sparse targets (integers 0 and 1)
    metrics=['accuracy']
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [9]:
# =============================
# 🔄 Create TensorFlow Dataset Pipeline (with Optional Augmentation)
# =============================

# Toggle to enable or disable data augmentation for the training set
AUGMENTATION_ENABLED = True

def augment_image(x, y):
    """
    Applies random augmentations to an image tensor.
    Used to improve model generalization and reduce overfitting.

    Augmentations:
    - Random horizontal flip
    - Random vertical flip
    - Brightness jitter
    - Contrast jitter
    """
    x = tf.image.random_flip_left_right(x)
    x = tf.image.random_flip_up_down(x)
    x = tf.image.random_brightness(x, max_delta=0.1)
    x = tf.image.random_contrast(x, lower=0.9, upper=1.1)
    return tf.cast(x, tf.float32), tf.cast(y, tf.int32)

def basic_cast(x, y):
    """
    Converts image and label to TensorFlow float32/int32 without augmentation.
    """
    return tf.cast(x, tf.float32), tf.cast(y, tf.int32)

# Create the training dataset pipeline
train_dataset = tf.data.Dataset.from_tensor_slices((train_data_x, train_data_y))
train_dataset = train_dataset.map(augment_image if AUGMENTATION_ENABLED else basic_cast)
train_dataset = train_dataset.shuffle(1000)        # Shuffle training data
train_dataset = train_dataset.batch(BATCH_SIZE)    # Batch it
train_dataset = train_dataset.repeat()             # Repeat indefinitely for multiple epochs

# Validation and test sets use no augmentation
val_dataset = tf.data.Dataset.from_tensor_slices((validation_data_x, validation_data_y))
val_dataset = val_dataset.map(basic_cast).batch(BATCH_SIZE)

test_dataset = tf.data.Dataset.from_tensor_slices((test_data_x, test_data_y))
test_dataset = test_dataset.map(basic_cast).batch(BATCH_SIZE)


In [None]:
# =============================
# 🚂 Train the Model
# =============================

# Determine how many batches per epoch based on training data size
steps_per_epoch = len(train_data_x) // BATCH_SIZE

# Fit the model using the prepared dataset pipelines
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    steps_per_epoch=steps_per_epoch,
    validation_data=val_dataset
)

# =============================
# 💾 Save the Trained Model
# =============================
MODEL_PATH = "/content/drive/MyDrive/saved_model.h5"
model.save(MODEL_PATH)
print(f"✅ Model saved to: {MODEL_PATH}")


Epoch 1/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m288s[0m 6s/step - accuracy: 0.5319 - loss: 0.6870 - val_accuracy: 0.4872 - val_loss: 0.6951
Epoch 2/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m285s[0m 6s/step - accuracy: 0.5172 - loss: 0.6836 - val_accuracy: 0.4909 - val_loss: 0.6891
Epoch 3/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 5s/step - accuracy: 0.5564 - loss: 0.6660 - val_accuracy: 0.5392 - val_loss: 0.6761
Epoch 4/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 5s/step - accuracy: 0.6206 - loss: 0.6573 - val_accuracy: 0.5956 - val_loss: 0.6668
Epoch 5/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 5s/step - accuracy: 0.7131 - loss: 0.6441 - val_accuracy: 0.6767 - val_loss: 0.6513
Epoch 6/100
[1m48/48[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 5s/step - accuracy: 0.7488 - loss: 0.6273 - val_accuracy: 0.7158 - val_loss: 0.6340
Epoch 7/100
[1m48/48[0m [

In [None]:
# =============================
# 🧾 Evaluate Model (Train/Validation/Test Results)
# =============================

# 1️⃣ Get Best Training and Validation Accuracies from Training History
best_train_acc = max(history.history['accuracy'])
best_val_acc = max(history.history['val_accuracy'])

# 2️⃣ Evaluate the model on the test dataset
# Returns final test loss and accuracy
test_loss, test_accuracy = model.evaluate(test_dataset, verbose=1)

# 3️⃣ Print Summary of Results
print("\n🏁 Final Results:")
print(f"📈 Best Train Accuracy      : {best_train_acc:.4f}")
print(f"📈 Best Validation Accuracy : {best_val_acc:.4f}")
print(f"📈 Final Test Accuracy      : {test_accuracy:.4f}")
print(f"📉 Final Test Loss          : {test_loss:.4f}")


In [None]:
# =============================
# 📈 Accuracy & Loss Curves (Improved)
# =============================

# 📊 Accuracy Plot
plt.figure(figsize=(8, 5))
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy Over Epochs')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

# 📉 Loss Plot
plt.figure(figsize=(8, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

# =============================
# 📊 Performance Metrics (Confusion Matrix & Scores)
# =============================

# 🔍 Predict class probabilities for test data
result = model.predict(test_data_x)

# 🎯 Convert predicted probabilities to class labels
predicted_labels = np.argmax(result, axis=1)

# 📉 Confusion Matrix
cm = confusion_matrix(test_data_y, predicted_labels)
class_labels = ['IPF Positive', 'IPF Negative']

# 🔵 Confusion Matrix Plot
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_labels,
            yticklabels=class_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# 🧮 Classification Metrics
precision = precision_score(test_data_y, predicted_labels)
recall = recall_score(test_data_y, predicted_labels)
f1 = f1_score(test_data_y, predicted_labels)
accuracy = accuracy_score(test_data_y, predicted_labels)

# 🖨️ Print Final Evaluation Metrics
print(f"Accuracy : {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall   : {recall:.4f}")
print(f"F1 Score : {f1:.4f}")



In [None]:
# =============================
# 🧰 Setup and Environment Configuration
# =============================

!pip install ipywidgets --quiet

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow warnings
import logging
logging.getLogger('tensorflow').setLevel(logging.FATAL)

# General Imports
import time
import numpy as np
import cv2
import io
import base64
from PIL import Image
import matplotlib.pyplot as plt

# GUI and Display
from IPython.display import display, clear_output, HTML
import ipywidgets as widgets

# Deep Learning
from tensorflow.keras.models import load_model

# Google Drive Access
from google.colab import drive
drive.mount("/content/drive", force_remount=True)

# =============================
# 🧠 Load Pretrained Model
# =============================

MODEL_PATH = "/content/drive/MyDrive/saved_model.h5"
IMAGE_SIZE = 256
model = load_model(MODEL_PATH, compile=False)

# =============================
# 🖼️ GUI Widgets Setup
# =============================

# Folder input widget
folder_input = widgets.Text(
    value="/content/drive/MyDrive/Patient_Folder",
    placeholder="Paste patient folder path here",
    description="📁 Folder:",
    layout=widgets.Layout(width='95%')
)

# Diagnose button
diagnose_button = widgets.Button(
    description="🩺 Diagnose",
    button_style="success",
    icon="search",
    layout=widgets.Layout(width='30%')
)

# Loader, progress, and output display areas
loader = widgets.Label(value="")
progress_bar = widgets.IntProgress(value=0, min=0, max=100, description='Progress:',
                                   bar_style='info', style={'description_width': 'initial'},
                                   layout=widgets.Layout(width='80%'))
output = widgets.Output()
image_display_box = widgets.HTML(value="")

# =============================
# 📦 Image Utility Functions
# =============================

# Convert NumPy image to base64 for HTML embedding
def img_to_b64(img_np):
    img = Image.fromarray(img_np)
    buffer = io.BytesIO()
    img.save(buffer, format="PNG")
    return base64.b64encode(buffer.getvalue()).decode()

# Create a styled HTML block for each image prediction
def render_image_html(img_b64, title, is_flagged=False):
    border = "3px solid red" if is_flagged else "1px solid #ccc"
    return f"""
        <div style="margin:5px;text-align:center;border:{border};padding:4px;width:150px;">
            <img src="data:image/png;base64,{img_b64}" style="width:100%;border-radius:6px;">
            <div style="font-size:11px; font-weight:bold; margin-top:3px;">{title}</div>
        </div>
    """

# =============================
# 🧪 Evaluate Patient Folder in Real-Time
# =============================

def evaluate_patient_live(folder_path):
    filenames, predictions, predicted_labels, images = [], [], [], []
    grid_html = "<div style='display:flex;flex-wrap:wrap;justify-content:center;'>"

    files = sorted([f for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
    total = len(files)
    progress_bar.max = total

    for idx, filename in enumerate(files):
        path = os.path.join(folder_path, filename)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img_rgb = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
        image_resized = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
        image_input = image_resized.reshape(1, IMAGE_SIZE, IMAGE_SIZE, 1) / 255.0

        pred = model.predict(image_input, verbose=0)[0]
        predictions.append(pred)
        predicted_labels.append(np.argmax(pred))
        filenames.append(filename)
        images.append(img_rgb)

        label = "IPF Positive" if np.argmax(pred) == 0 else "IPF Negative"
        pred_str = f"{pred[0]:.2f}/{pred[1]:.2f}"
        b64img = img_to_b64(cv2.resize(img_rgb, (150, 150)))
        grid_html += render_image_html(b64img, f"{filename}<br>{label}<br>({pred_str})")

        progress_bar.value = idx + 1
        progress_bar.description = f"Diagnosed {idx + 1}/{total}"
        image_display_box.value = grid_html + "</div>"

        time.sleep(0.15)  # Simulated delay for real-time effect

    return filenames, predictions, predicted_labels, images

# =============================
# 🖱️ On Diagnose Button Click
# =============================

def on_diagnose_clicked(b):
    with output:
        clear_output()
        loader.value = "🔄 Diagnosing, please wait..."
        folder_path = folder_input.value.strip()

        if not os.path.exists(folder_path):
            loader.value = "❌ Folder does not exist!"
            return

        image_display_box.value = ""
        filenames, predictions, predicted_labels, images = evaluate_patient_live(folder_path)

        total = len(predicted_labels)
        positive = predicted_labels.count(0)
        negative = predicted_labels.count(1)
        final_vote = 0 if positive > negative else 1
        final_diagnosis = "IPF Positive" if final_vote == 0 else "IPF Negative"
        loader.value = ""

        # === Final Summary Display
        summary_html = widgets.HTML(
            value=f"""
            <div style="background: #f9f9f9; border: 2px solid #007bff;
                        padding: 20px 30px; border-radius: 12px;
                        width: 60%; margin: auto;
                        box-shadow: 0 4px 12px rgba(0,0,0,0.15);
                        font-family: Arial, sans-serif; color: #111;">
                <h2 style="text-align:center; color:#007bff; margin-top:0;">🩺 Final Patient Diagnosis</h2>
                <p style="font-size:16px;"><strong>Total Images:</strong> {total}</p>
                <p style="font-size:16px;"><strong>IPF Positive:</strong> {positive} ({(positive/total)*100:.2f}%)</p>
                <p style="font-size:16px;"><strong>IPF Negative:</strong> {negative} ({(negative/total)*100:.2f}%)</p>
                <p style="font-size:18px; margin-top:20px;">
                    <strong>🧠 Final Diagnosis:</strong>
                    <span style="color:{'red' if final_diagnosis == 'IPF Positive' else 'green'}; font-weight:bold;">
                        {final_diagnosis}
                    </span>
                </p>
            </div>
            """)
        display(summary_html)

        # === Flagged Image Display (disagrees with final diagnosis)
        flagged_html = "<div style='margin-top:30px;text-align:center;'><h3>🟥 Flagged Predictions (Opposite to Final Diagnosis)</h3></div>"
        flagged_html += "<div style='display:flex;flex-wrap:wrap;justify-content:center;'>"

        found_flagged = False
        for i, pred in enumerate(predictions):
            individual_vote = np.argmax(pred)
            if individual_vote != final_vote:
                found_flagged = True
                label = "IPF Positive" if individual_vote == 0 else "IPF Negative"
                pred_str = f"{pred[0]:.2f}/{pred[1]:.2f}"
                b64img = img_to_b64(cv2.resize(images[i], (150, 150)))
                flagged_html += render_image_html(b64img, f"{filenames[i]}<br>{label}<br>({pred_str})", is_flagged=True)

        flagged_html += "</div>"
        if found_flagged:
            display(HTML(flagged_html))
        else:
            display(HTML("<p style='text-align:center;color:#555;'>✅ No flagged predictions found.</p>"))

# =============================
# 💡 Launch the GUI
# =============================

diagnose_button.on_click(on_diagnose_clicked)
display(folder_input, diagnose_button, loader, progress_bar, image_display_box, output)


In [None]:
# =============================
# 🔁 Transfer Learning: Fine-Tune a Saved Model with New Data (With Frozen Layers)
# =============================

# === Imports
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
import tensorflow as tf
import numpy as np
import cv2
import os
import random
from google.colab import drive

# 🔗 Mount Google Drive
drive.mount('/content/drive')

# =============================
# 📁 Define Paths and Constants
# =============================
MODEL_PATH = "/content/drive/MyDrive/saved_model.h5"
FINE_TUNED_MODEL_PATH = "/content/drive/MyDrive/saved_model_finetuned.h5"
NEW_DATA_PATH = "/content/drive/MyDrive/transfer learning"
VALIDATION_PATH = "/content/drive/MyDrive/transfer test/Validation"

IMAGE_SIZE = 256
BATCH_SIZE = 64
LEARNING_RATE = 5e-6
AUGMENTATION_ENABLED = True
LABELS = ["IPF Positive", "IPF Negative"]

# =============================
# 🧠 Load and Recompile the Model
# =============================
model = load_model(MODEL_PATH, compile=False)

# Optionally freeze layers before fine-tuning
# for layer in model.layers[:-2]:
#     layer.trainable = False

model.compile(
    optimizer=Adam(learning_rate=LEARNING_RATE),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# =============================
# 🖼️ Data Loading Functions
# =============================
def load_images_from_folder(path, label):
    """
    Loads grayscale images from a subfolder named after the label.
    """
    data = []
    folder = os.path.join(path, label)
    for fname in os.listdir(folder):
        if fname.lower().endswith(('.png', '.jpg', '.jpeg')):
            try:
                img_path = os.path.join(folder, fname)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                data.append((img, LABELS.index(label)))
            except:
                pass
    return data

def prepare_data(folder_path):
    """
    Prepares dataset as normalized tensors from a labeled folder path.
    """
    data = []
    for label in LABELS:
        data.extend(load_images_from_folder(folder_path, label))
    random.shuffle(data)
    x, y = zip(*data)
    x = np.array(x).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1) / 255.0  # Normalize to [0,1]
    y = np.array(y)
    return x, y

# =============================
# 🧪 Prepare Training and Validation Data
# =============================
train_x, train_y = prepare_data(NEW_DATA_PATH)
val_x, val_y = prepare_data(VALIDATION_PATH)

# =============================
# 🔄 Data Augmentation Functions
# =============================
def augment_image(x, y):
    x = tf.image.random_flip_left_right(x)
    x = tf.image.random_flip_up_down(x)
    x = tf.image.random_brightness(x, max_delta=0.1)
    x = tf.image.random_contrast(x, lower=0.9, upper=1.1)
    return tf.cast(x, tf.float32), tf.cast(y, tf.int32)

def basic_cast(x, y):
    return tf.cast(x, tf.float32), tf.cast(y, tf.int32)

# =============================
# 📦 TensorFlow Dataset Pipeline
# =============================
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y))
train_dataset = train_dataset.map(augment_image if AUGMENTATION_ENABLED else basic_cast)
train_dataset = train_dataset.shuffle(1000).batch(BATCH_SIZE).repeat()

val_dataset = tf.data.Dataset.from_tensor_slices((val_x, val_y))
val_dataset = val_dataset.map(basic_cast).batch(BATCH_SIZE)

# =============================
# 🚂 Fine-Tune the Model
# =============================
steps = len(train_x) // BATCH_SIZE

fine_tune_history = model.fit(
    train_dataset,
    epochs=60,
    steps_per_epoch=steps,
    validation_data=val_dataset
)

# =============================
# 💾 Save the Fine-Tuned Model
# =============================
model.save(FINE_TUNED_MODEL_PATH)
print(f"✅ Fine-tuned model saved at: {FINE_TUNED_MODEL_PATH}")


In [None]:
# =============================
# 📊 Transfer Learning Evaluation on Test Set
# =============================

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
    accuracy_score
)

# =============================
# 🧪 Load and Prepare Test Set
# =============================
TEST_PATH = "/content/drive/MyDrive/transfer test/Test"
test_x, test_y = prepare_data(TEST_PATH)  # Uses same preprocessing as training/validation

test_x_array = np.array(test_x)
test_y_array = np.array(test_y)

# =============================
# 🔮 Generate Predictions
# =============================
predictions = model.predict(test_x_array)
predicted_labels = np.argmax(predictions, axis=1)

# =============================
# 📉 Confusion Matrix
# =============================
cm = confusion_matrix(test_y_array, predicted_labels)
class_labels = ['IPF Positive', 'IPF Negative']

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=class_labels,
            yticklabels=class_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix (Test Set)')
plt.show()

# =============================
# 🧾 Final Classification Metrics
# =============================
accuracy = accuracy_score(test_y_array, predicted_labels)
precision = precision_score(test_y_array, predicted_labels)
recall = recall_score(test_y_array, predicted_labels)
f1 = f1_score(test_y_array, predicted_labels)

print("\n🏁 Fine-Tuned Results:")
print(f"📈 Accuracy : {accuracy:.4f}")
print(f"📈 Precision: {precision:.4f}")
print(f"📈 Recall   : {recall:.4f}")
print(f"📈 F1 Score : {f1:.4f}")
