In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import os
import random
from glob import glob
from sklearn.model_selection import train_test_split
from PIL import Image
from pathlib import Path
from tensorflow.keras import layers, models, callbacks
from tensorflow.keras.applications import DenseNet121
from IPython.display import HTML

In [4]:
# -----------------------------
# PARAMETERS
# -----------------------------
data_dir = "Osteoporosis_Detection_Preprocessing/dataset_new"   # Path to your dataset
IMG_SIZE = (224, 224)
BATCH_SIZE = 8
AUTOTUNE = tf.data.AUTOTUNE
AUG_FACTOR = 3
EPOCHS = 30

In [5]:
# -----------------------------
# LOAD FILES & LABELS
# -----------------------------
classes = sorted([d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))])
class_to_index = {c: i for i, c in enumerate(classes)}

all_image_paths, all_labels = [], []
for cls in classes:
    paths = glob(os.path.join(data_dir, cls, "*"))
    all_image_paths.extend(paths)
    all_labels.extend([class_to_index[cls]] * len(paths))

all_image_paths = np.array(all_image_paths)
all_labels = np.array(all_labels)

print("Classes:", classes)
print("Original dataset size:", len(all_image_paths))

# -----------------------------
# PRINT CLASS DISTRIBUTION
# -----------------------------
for cls, idx in class_to_index.items():
    count = np.sum(all_labels == idx)
    print(f"{cls}: {count} images")


Classes: ['normal', 'osteoporosis']
Original dataset size: 1945
normal: 966 images
osteoporosis: 979 images


In [6]:
# -----------------------------
# NORMALIZATION FUNCTION 
# -----------------------------
def normalize_image_rgb(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=1, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.tile(img, [1, 1, 3])
    mean = tf.constant([0.485, 0.456, 0.406])
    std = tf.constant([0.229, 0.224, 0.225])
    img = (img - mean) / std
    return img, label

# -----------------------------
# STRONG AUGMENTATION FUNCTION
# -----------------------------
def augment_image(img, label):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_brightness(img, max_delta=0.2)
    img = tf.image.random_contrast(img, 0.9, 1.1)
    return img, label




In [7]:
# -----------------------------
# BUILD DATASET
# -----------------------------
dataset = tf.data.Dataset.from_tensor_slices((all_image_paths, all_labels))
dataset = dataset.map(normalize_image_rgb, num_parallel_calls=AUTOTUNE)

In [8]:
# -----------------------------
# SPLIT TRAIN / VAL / TEST (50/25/25)
# -----------------------------
dataset_size = len(all_image_paths)
train_size = int(0.5 * dataset_size)
val_size = int(0.25 * dataset_size)
test_size = dataset_size - train_size - val_size  
dataset = dataset.shuffle(dataset_size, reshuffle_each_iteration=False)
train_ds = dataset.take(train_size)
val_test_ds = dataset.skip(train_size)
val_ds = val_test_ds.take(val_size)
test_ds = val_test_ds.skip(val_size)

print("Train size:", sum(1 for _ in train_ds))
print("Val size  :", sum(1 for _ in val_ds))
print("Test size :", sum(1 for _ in test_ds))


Train size: 972
Val size  : 486
Test size : 487


In [9]:
# -----------------------------
# AUGMENT TRAINING DATA
# -----------------------------
train_ds = train_ds.map(augment_image, num_parallel_calls=AUTOTUNE).repeat(AUG_FACTOR)
train_ds = train_ds.shuffle(buffer_size=train_size * AUG_FACTOR)

# -----------------------------
# BATCH & PREFETCH
# -----------------------------
train_ds = train_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
val_ds = val_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)


In [10]:
# -----------------------------
# MODEL WITH DenseNet121
# -----------------------------
base_model = DenseNet121(
    include_top=False,
    weights='imagenet',
    input_shape=(224,224,3)
)
base_model.trainable = True  # fine-tune entire base

# ADD CUSTOM CLASSIFIER
x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(1, activation='sigmoid')(x)  # binary classification

model = models.Model(inputs=base_model.input, outputs=outputs)

# -----------------------------
# COMPILE MODEL
# -----------------------------
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:
# -----------------------------
# CALLBACKS
# -----------------------------
early_stop = callbacks.EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# -----------------------------
# TRAIN MODEL
# -----------------------------
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS,
    callbacks=[early_stop, reduce_lr]
)


In [None]:

model.save("osteoporosis_model.h5")

In [None]:
def evaluate_model(model, dataset, split_name="Dataset"):
    classes = ["normal", "osteoporosis"]
    y_true, y_pred = [], []

    for imgs, lbls in dataset:
        preds = model.predict(imgs, verbose=0)
        y_true.extend(lbls.numpy())
        y_pred.extend((preds > 0.5).astype("int32").flatten())

    y_true = np.array(y_true)
    y_pred = np.array(y_pred)

    acc = np.mean(y_true == y_pred)
    print(f"\n{split_name} Accuracy: {acc*100:.2f}%\n")

    print(f"{split_name} Classification Report:")
    print(classification_report(y_true, y_pred, target_names=classes))

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(5,4))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=classes, yticklabels=classes)
    plt.title(f"{split_name} Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("Actual")
    plt.show()


In [None]:
# =========================
# Run evaluation
# =========================
evaluate_model(model, train_ds, split_name="Train")
evaluate_model(model, val_ds, split_name="Validation")
evaluate_model(model, test_ds, split_name="Test")

In [None]:
# -----------------------------
# PLOT TRAINING CURVES
# -----------------------------
acc = history.history.get('accuracy', [])
val_acc = history.history.get('val_accuracy', [])
loss = history.history.get('loss', [])
val_loss = history.history.get('val_loss', [])

epochs_ran = range(1, len(acc) + 1)

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_ran, acc, label='Training Accuracy')
plt.plot(epochs_ran, val_acc, label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_ran, loss, label='Training Loss')
plt.plot(epochs_ran, val_loss, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.tight_layout()
plt.show()

In [None]:
# -----------------------------
# PREDICTION & VISUALIZATION HELPERS
# -----------------------------
# class names (ensure matches your earlier `classes`)
class_names = classes  # e.g. ['normal', 'osteoporosis']

# mean/std used earlier for normalization (DenseNet values)
MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32)
STD  = np.array([0.229, 0.224, 0.225], dtype=np.float32)

def unnormalize_image(img_norm):
    """
    img_norm: tensor or numpy array in shape (H,W,3) already normalized with (img-mean)/std.
    Returns an image in [0,1] float range suitable for plt.imshow.
    """
    if isinstance(img_norm, tf.Tensor):
        img_np = img_norm.numpy()
    else:
        img_np = np.array(img_norm)
    img = (img_np * STD) + MEAN   # reverse normalization
    img = np.clip(img, 0.0, 1.0)
    return img

def predict_single(model, img):
    """
    img: a single image tensor (H,W,3) or numpy array that is ALREADY normalized as the model expects.
    returns: (predicted_class_name, confidence_percent, prob_value)
    """
    # Convert to tensor if needed
    if not isinstance(img, tf.Tensor):
        img = tf.convert_to_tensor(img, dtype=tf.float32)

    # Ensure shape (1,H,W,3)
    if len(img.shape) == 3:
        inp = tf.expand_dims(img, axis=0)
    elif len(img.shape) == 4:
        inp = img
    else:
        raise ValueError("Unexpected image shape for prediction: " + str(img.shape))

    probs = model.predict(inp, verbose=0)  # shape (1,1)
    prob = float(probs[0][0])  # probability for class '1' (osteoporosis if classes[1] is that)
    pred_idx = 1 if prob > 0.5 else 0
    predicted_class = class_names[pred_idx]
    confidence = round(100.0 * (prob if pred_idx == 1 else (1.0 - prob)), 2)
    return predicted_class, confidence, prob


# -----------------------------
# VISUALIZE PREDICTIONS ON TEST BATCH
# -----------------------------
plt.figure(figsize=(12, 12))

# grab one or more batches until we have enough images to display
display_count = 9
collected = 0
for batch_images, batch_labels in test_ds.take(5):  # take up to a few batches
    batch_size_here = batch_images.shape[0]
    for i in range(batch_size_here):
        if collected >= display_count:
            break
        img_norm = batch_images[i]        # already normalized tensor
        label = int(batch_labels[i].numpy())
        # unnormalize for display
        img_display = unnormalize_image(img_norm)
        predicted_class, confidence, prob = predict_single(model, img_norm)

        ax = plt.subplot(3, 3, collected + 1)
        plt.imshow(img_display)  # floats in [0,1]
        actual_class = class_names[label]
        plt.title(f"Actual: {actual_class}\nPredicted: {predicted_class} ({confidence}%)\nProb(osteoporosis): {prob:.3f}")
        plt.axis("off")

        collected += 1
    if collected >= display_count:
        break

plt.tight_layout()
plt.show()
