In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download(
    "mrmaazoo/breast-ultrasound-classification",
)

print("Path to dataset files:", path)



Downloading from https://www.kaggle.com/api/v1/datasets/download/mrmaazoo/breast-ultrasound-classification?dataset_version_number=1...


100%|██████████| 188M/188M [00:01<00:00, 108MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/mrmaazoo/breast-ultrasound-classification/versions/1


In [2]:
import matplotlib.pyplot as plt

# ========== 7. 視覺化 ==========
def plot_history(history, title_suffix=""):
    plt.figure()
    plt.plot(history.history["loss"], label="train")
    plt.plot(history.history["val_loss"], label="val")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"Loss {title_suffix}")
    plt.legend()
    plt.show()

    plt.figure()
    plt.plot(history.history["accuracy"], label="train")
    plt.plot(history.history["val_accuracy"], label="val")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.title(f"Accuracy {title_suffix}")
    plt.legend()
    plt.show()

In [3]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications.efficientnet import (
    EfficientNetB0, preprocess_input,
)
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import numpy as np
from pathlib import Path

# ========== 0. 基本設定 ==========
DATA_DIR   = Path(path) / "BUSI_Corrected"   # <-- 把 path 換成你的資料根目錄
IMG_SIZE   = (256, 256)                      # ★ 改成 256 × 256
BATCH_SIZE = 32
AUTOTUNE   = tf.data.AUTOTUNE
EPOCHS_TOP = 200   # 先訓練頂層

# ========== 1. 蒐集檔名 (排除 _mask.png) & 分層切分 ==========
class_names = sorted([p.name for p in DATA_DIR.iterdir() if p.is_dir()])

filepaths, labels = [], []
for cls_idx, cls_name in enumerate(class_names):
    for img_path in (DATA_DIR / cls_name).glob("*.png"):
        if "_mask" in img_path.stem:          # ★ 排除 segmentation mask
            continue
        filepaths.append(str(img_path))
        labels.append(cls_idx)

filepaths, labels = np.array(filepaths), np.array(labels)

train_idx, val_idx = train_test_split(
    np.arange(len(labels)),
    test_size=0.2,
    stratify=labels,
    random_state=42,
)

train_paths, val_paths = filepaths[train_idx], filepaths[val_idx]
train_labels, val_labels = labels[train_idx],  labels[val_idx]

# ========== 2. tf.data pipeline ==========
def load_and_prep(path, label):
    img = tf.io.read_file(path)
    img = tf.image.decode_png(img, channels=3)          # BUSI 為灰階，但轉 3ch 便於載權重
    img = tf.image.resize(img, IMG_SIZE)
    img = preprocess_input(img)                         # ★ 用官方前處理
    return img, label

augmenter = tf.keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.2),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.2),
    layers.RandomBrightness(0.1),
])

def make_dataset(paths, labels, training=False):
    ds = tf.data.Dataset.from_tensor_slices((paths, labels))
    if training:
        ds = ds.shuffle(1000)
    ds = ds.map(load_and_prep, num_parallel_calls=AUTOTUNE)
    if training:
        ds = ds.map(lambda x, y: (augmenter(x, training=True), y),
                    num_parallel_calls=AUTOTUNE)
    return ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)

train_ds = make_dataset(train_paths, train_labels, training=True)
val_ds   = make_dataset(val_paths,   val_labels,   training=False)

# ========== 3. 類別權重 ==========
cw = class_weight.compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_labels),
    y=train_labels,
)
class_weight_dict = dict(enumerate(cw))

In [None]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.applications.efficientnet import (
    EfficientNetB0, preprocess_input,
)
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt


# ========== 4. 建立模型：EfficientNetB0 ==========
base = EfficientNetB0(
    include_top=False,
    input_shape=IMG_SIZE + (3,),
    weights="imagenet",
)
base.trainable = True               # 先凍結特徵抽取器

inputs  = tf.keras.Input(shape=IMG_SIZE + (3,))
x = base(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(len(class_names), activation="softmax")(x)

model = tf.keras.Model(inputs, outputs)
model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-5),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

# ========== 5. 訓練頂層 ==========
history_top = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=EPOCHS_TOP,
    class_weight=class_weight_dict,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=3, restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.3, patience=2
        ),
    ],
)

plot_history(history_top,  "(EfficientNetB0)")


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/200
[1m18/19[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 656ms/step - accuracy: 0.3292 - loss: 1.1653

In [None]:
# --- ResNet50 模型訓練 ---
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input as resnet_preprocess
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input

# 使用 ResNet50，不包含頂層分類器
base_model = ResNet50(weights="imagenet", include_top=False, input_tensor=Input(shape=IMG_SIZE + (3,)))
base_model.trainable = False  # 先凍結特徵提取層

# 加上分類頭
x = base_model.output
x = GlobalAveragePooling2D()(x)
output = Dense(len(class_names), activation="softmax")(x)
model_resnet = Model(inputs=base_model.input, outputs=output)

# 編譯模型
model_resnet.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# 資料前處理（根據 ResNet 要求）
train_ds_resnet = train_ds.map(lambda x, y: (resnet_preprocess(x), y))
val_ds_resnet = val_ds.map(lambda x, y: (resnet_preprocess(x), y))

# 訓練模型
history_resnet = model_resnet.fit(
    train_ds_resnet,
    validation_data=val_ds_resnet,
    epochs=EPOCHS_TOP,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=3, restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.3, patience=2
        ),])

plot_history(history_resnet, title_suffix="(ResNet50)")


In [None]:
# --- DenseNet121 模型訓練 ---
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.applications.densenet import preprocess_input as densenet_preprocess

# 使用 DenseNet121，不包含頂層分類器
base_model = DenseNet121(weights="imagenet", include_top=False, input_tensor=Input(shape=IMG_SIZE + (3,)))
base_model.trainable = False

# 加上分類頭
x = base_model.output
x = GlobalAveragePooling2D()(x)
output = Dense(len(class_names), activation="softmax")(x)
model_densenet = Model(inputs=base_model.input, outputs=output)

# 編譯模型
model_densenet.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# 資料前處理（根據 DenseNet 要求）
train_ds_dense = train_ds.map(lambda x, y: (densenet_preprocess(x), y))
val_ds_dense = val_ds.map(lambda x, y: (densenet_preprocess(x), y))

# 訓練模型
history_densenet = model_densenet.fit(
    train_ds_dense,
    validation_data=val_ds_dense,
    epochs=EPOCHS_TOP,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(
            monitor="val_loss", patience=3, restore_best_weights=True
        ),
        tf.keras.callbacks.ReduceLROnPlateau(
            monitor="val_loss", factor=0.3, patience=2
        ),]
    )

plot_history(history_densenet, title_suffix="(DenseNet121)")