In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D,
    Activation, Flatten,
    Dense, Dropout
)

# ------------------------------------------------------
# PARAMETERS & DIRECTORY SETUP
# ------------------------------------------------------
dataset_dir = r"C:\Users\benny\Sonogram\dataset" 
classes     = ['normal', 'benign', 'malignant']
class_to_label = {cls: i for i, cls in enumerate(classes)}

img_height, img_width = 150, 150
batch_size = 16
epochs     = 30
learning_rate = 1e-4

# ------------------------------------------------------
# COLLECT IMAGE PATHS & LABELS
# ------------------------------------------------------
all_paths = []
all_labels = []
for cls in classes:
    folder = os.path.join(dataset_dir, cls)
    if not os.path.isdir(folder):
        print(f"Warning: class folder missing: {folder}")
        continue
    for fname in os.listdir(folder):
        # skip mask files or non-image
        if '_mask' in fname.lower(): 
            continue
        ext = os.path.splitext(fname)[1].lower()
        if ext not in {'.jpg', '.jpeg', '.png'}:
            continue
        all_paths.append(os.path.join(folder, fname))
        all_labels.append(class_to_label[cls])

print(f"Found {len(all_paths)} total images.")

# ------------------------------------------------------
# TRAIN/VAL/TEST SPLIT
# ------------------------------------------------------
data = list(zip(all_paths, all_labels))
np.random.shuffle(data)
paths_shuffled, labels_shuffled = zip(*data)

n = len(paths_shuffled)
n_train = int(0.7 * n)
n_val   = int(0.15 * n)

train_paths = paths_shuffled[:n_train]
train_labels= labels_shuffled[:n_train]

val_paths   = paths_shuffled[n_train:n_train+n_val]
val_labels  = labels_shuffled[n_train:n_train+n_val]

test_paths  = paths_shuffled[n_train+n_val:]
test_labels = labels_shuffled[n_train+n_val:]

print(f"Train: {len(train_paths)}, Val: {len(val_paths)}, Test: {len(test_paths)}")

# ------------------------------------------------------
# DATASET CREATION
# ------------------------------------------------------
def load_and_preprocess(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [img_height, img_width])
    img = img / 255.0
    return img, label

def make_dataset(paths, labels, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((list(paths), list(labels)))
    ds = ds.map(load_and_preprocess, 
                num_parallel_calls=tf.data.experimental.AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(buffer_size=len(paths))
    ds = ds.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
    return ds

train_ds = make_dataset(train_paths, train_labels, shuffle=True)
val_ds   = make_dataset(val_paths, val_labels, shuffle=False)
test_ds  = make_dataset(test_paths, test_labels, shuffle=False)

# ------------------------------------------------------
# MODEL DEFINITION (3‑channel input)
# ------------------------------------------------------
model = Sequential([
    Input(shape=(img_height, img_width, 3)),
    Conv2D(32, (3, 3)), Activation('relu'), MaxPooling2D(),
    Conv2D(64, (3, 3)), Activation('relu'), MaxPooling2D(),
    Conv2D(128, (3, 3)), Activation('relu'), MaxPooling2D(),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(classes), activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(learning_rate),
    metrics=['accuracy']
)

model.summary()

# ------------------------------------------------------
# TRAIN
# ------------------------------------------------------
history = model.fit(
    train_ds,
    epochs=epochs,
    validation_data=val_ds
)

# ------------------------------------------------------
# EVALUATE & SAVE
# ------------------------------------------------------
test_loss, test_acc = model.evaluate(test_ds)
print(f"Test Accuracy: {test_acc:.4f}")

model.save("sonogram_tumor_detection_model.h5")
print("Model saved as sonogram_tumor_detection_model.h5")


Found 780 total images.
Train: 546, Val: 117, Test: 117


Epoch 1/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 157ms/step - accuracy: 0.5043 - loss: 1.0122 - val_accuracy: 0.5983 - val_loss: 0.9209
Epoch 2/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 174ms/step - accuracy: 0.5443 - loss: 0.9374 - val_accuracy: 0.6154 - val_loss: 0.8745
Epoch 3/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 200ms/step - accuracy: 0.6450 - loss: 0.8372 - val_accuracy: 0.5983 - val_loss: 0.8269
Epoch 4/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 210ms/step - accuracy: 0.7115 - loss: 0.7034 - val_accuracy: 0.6496 - val_loss: 0.8047
Epoch 5/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 218ms/step - accuracy: 0.7363 - loss: 0.6327 - val_accuracy: 0.6068 - val_loss: 0.8236
Epoch 6/30
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 217ms/step - accuracy: 0.7657 - loss: 0.5843 - val_accuracy: 0.6838 - val_loss: 0.7812
Epoch 7/30
[1m35/35[0m [3



Test Accuracy: 0.7436
Model saved as sonogram_tumor_detection_model.h5


In [5]:
# predict.py

import os
import numpy as np
import tensorflow as tf

# ──────────────────────────────────────────────────────────────────────────────
# SILENCE TF LOGS
# ──────────────────────────────────────────────────────────────────────────────
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
tf.get_logger().setLevel('ERROR')

# ──────────────────────────────────────────────────────────────────────────────
# CONFIGURATION
# ──────────────────────────────────────────────────────────────────────────────
MODEL_PATH  = "sonogram_tumor_detection_model.h5"
TEST_FOLDER = "datatotest"
IMG_SIZE    = (150, 150)
CLASSES     = ["normal", "benign", "malignant"]
VALID_EXTS  = {".jpg", ".jpeg", ".png"}
# ──────────────────────────────────────────────────────────────────────────────

def preprocess_rgb(path):
    """
    Reads and decodes a 3‑channel JPEG/PNG,
    resizes to IMG_SIZE, scales to [0,1],
    returns a (H, W, 3) float32 tensor.
    """
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img = tf.image.resize(img, IMG_SIZE) / 255.0
    return img

def main():
    # 1) Load the model (inference-only)
    if not os.path.exists(MODEL_PATH):
        print(f"ERROR: Model file not found at '{MODEL_PATH}'")
        return
    model = tf.keras.models.load_model(MODEL_PATH, compile=False)

    # 2) Collect all valid test images
    if not os.path.isdir(TEST_FOLDER):
        print(f"ERROR: Test folder not found at '{TEST_FOLDER}'")
        return
    files = sorted(f for f in os.listdir(TEST_FOLDER)
                   if os.path.splitext(f)[1].lower() in VALID_EXTS)
    if not files:
        print("No images to process.")
        return
    paths = [os.path.join(TEST_FOLDER, f) for f in files]

    # 3) Preprocess all into a single batch of shape (N, H, W, 3)
    imgs = [preprocess_rgb(p) for p in paths]
    batch = tf.stack(imgs, axis=0)  # shape: (N, 150, 150, 3)

    # 4) Single eager call
    preds = model(batch, training=False).numpy()  # shape: (N, 3)

    # 5) Print results and compute (optional) weighted accuracy
    sum_conf_all = 0.0
    sum_conf_corr = 0.0

    for fname, pred in zip(files, preds):
        idx   = np.argmax(pred)
        label = CLASSES[idx]
        conf  = float(pred[idx])

        # assuming you know the true class folder name matches CLASSES
        true_label = fname.split(os.sep)[-1].split()[0]  # e.g. "malignant" or "benign"
        correct = (label == true_label)

        sum_conf_all += conf
        if correct:
            sum_conf_corr += conf

        print(f"{fname:30s} → {label:9s} (confidence: {conf:.4f})")

    if sum_conf_all > 0:
        weighted_acc = sum_conf_corr / sum_conf_all
        print(f"\nWeighted accuracy: {weighted_acc:.2%}")

if __name__ == "__main__":
    main()


benign (1).png                 → benign    (confidence: 1.0000)
benign (10).png                → benign    (confidence: 1.0000)
benign (100).png               → benign    (confidence: 0.9925)
benign (101).png               → malignant (confidence: 0.7984)
benign (102).png               → benign    (confidence: 0.9200)
benign (103).png               → malignant (confidence: 0.9642)
benign (104).png               → benign    (confidence: 0.9994)
benign (105).png               → benign    (confidence: 0.9996)
benign (106).png               → benign    (confidence: 0.9990)
benign (107).png               → benign    (confidence: 0.9982)
benign (108).png               → benign    (confidence: 0.9841)
benign (109).png               → benign    (confidence: 0.9999)
benign (11).png                → benign    (confidence: 1.0000)
benign (110).png               → benign    (confidence: 1.0000)
benign (111).png               → benign    (confidence: 0.9798)
benign (112).png               → benign 