In [1]:
from pathlib import Path
import pandas as pd

# Kaggle'da eklediğin dataset yolu
INPUT = Path("/kaggle/input/skin-cancer-mnist-ham10000")

# 1) Metadata oku ve ilk satırları göster
meta_path = INPUT / "HAM10000_metadata.csv"
df = pd.read_csv(meta_path)
print("Metadata yolu:", meta_path)
print("Satır x Sütun:", df.shape)
display(df.head())

# 2) Görsel klasörleri var mı kontrol et (isim varyantlarını da dene)
candidates = [
    INPUT/"ham10000_images_part_1",
    INPUT/"ham10000_images_part_2",
    INPUT/"HAM10000_images_part_1",
    INPUT/"HAM10000_images_part_2",
    INPUT/"ham10000_images",
    INPUT/"HAM10000_images",
]
print("\nGörsel klasörleri:")
for p in candidates:
    print(p, "->", p.exists())

# 3) Örnek bir görsel yolu oluştur (sadece kontrol)
sample_file = df.loc[0, "image_id"] + ".jpg"
found = None
for p in candidates:
    if p.exists() and (p / sample_file).exists():
        found = p / sample_file
        break
print("\nÖrnek görsel (varsa):", found)


Metadata yolu: /kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv
Satır x Sütun: (10015, 7)


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear



Görsel klasörleri:
/kaggle/input/skin-cancer-mnist-ham10000/ham10000_images_part_1 -> True
/kaggle/input/skin-cancer-mnist-ham10000/ham10000_images_part_2 -> True
/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1 -> True
/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2 -> True
/kaggle/input/skin-cancer-mnist-ham10000/ham10000_images -> False
/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images -> False

Örnek görsel (varsa): /kaggle/input/skin-cancer-mnist-ham10000/ham10000_images_part_1/ISIC_0027419.jpg


In [2]:
import pandas as pd, shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

# Yollar
INPUT = Path("/kaggle/input/skin-cancer-mnist-ham10000")
PREP  = Path("/kaggle/working/data/prepared")
PREP.mkdir(parents=True, exist_ok=True)

df = pd.read_csv(INPUT/"HAM10000_metadata.csv")
df["filename"] = df["image_id"] + ".jpg"

# Split (%70 train, %15 val, %15 test)
train_df, temp = train_test_split(df, test_size=0.3, stratify=df["dx"], random_state=42)
val_df, test_df = train_test_split(temp, test_size=0.5, stratify=temp["dx"], random_state=42)

df["split"] = "train"
df.loc[val_df.index,"split"]  = "val"
df.loc[test_df.index,"split"] = "test"

# metadata.csv kaydet
df.to_csv(PREP/"metadata.csv", index=False)

print("Train:", len(train_df), "Val:", len(val_df), "Test:", len(test_df))

# Klasörleri oluştur
for split in ["train", "val", "test"]:
    for label in df["dx"].unique():
        (PREP/split/label).mkdir(parents=True, exist_ok=True)

# Resimleri kopyala
img_dirs = [
    INPUT/"ham10000_images_part_1",
    INPUT/"ham10000_images_part_2",
]

missing = 0
for _, row in df.iterrows():
    src = None
    for d in img_dirs:
        p = d/row["filename"]
        if p.exists():
            src = p
            break
    if src is None:
        missing += 1
        continue
    dst = PREP/row["split"]/row["dx"]/row["filename"]
    if not dst.exists():
        shutil.copy2(src, dst)

print("Kopyalama tamam ✅ Eksik:", missing)


Train: 7010 Val: 1502 Test: 1503
Kopyalama tamam ✅ Eksik: 0


In [3]:
import tensorflow as tf
import pandas as pd
from pathlib import Path

PREP = Path("/kaggle/working/data/prepared")
meta = pd.read_csv(PREP/"metadata.csv")

IMG_SIZE = (224,224)
AUTOTUNE = tf.data.AUTOTUNE

# Basit text özelliği (örnek: "sex:male localization:back age:45")
def build_text_feature(row):
    return f"sex:{row['sex']} localization:{row['localization']} age:{int(row['age']) if not pd.isna(row['age']) else -1}"

meta["text"] = meta.apply(build_text_feature, axis=1)
meta["filepath"] = meta.apply(lambda r: str(PREP/r["split"]/r["dx"]/r["filename"]), axis=1)

# Sınıf isimleri
class_names = sorted(meta["dx"].unique())
class_to_idx = {c:i for i,c in enumerate(class_names)}
num_classes = len(class_names)

meta["label"] = meta["dx"].map(class_to_idx)

print("Sınıflar:", class_names)

# TensorFlow dataset hazırlama
def decode_img(path):
    img = tf.io.read_file(path)
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)
    return tf.cast(img, tf.float32)/255.0

def make_ds(df, training=True, batch_size=32):
    paths = tf.convert_to_tensor(df["filepath"].values)
    texts = tf.convert_to_tensor(df["text"].values)
    labels = tf.one_hot(df["label"].values, num_classes)

    ds = tf.data.Dataset.from_tensor_slices((paths, texts, labels))

    def _map(path, text, label):
        img = decode_img(path)
        if training:
            img = tf.image.random_flip_left_right(img)
        return ({"image": img, "text": text}, label)

    if training:
        ds = ds.shuffle(2048)
    return ds.map(_map, num_parallel_calls=AUTOTUNE).batch(batch_size).prefetch(AUTOTUNE)

train_ds = make_ds(meta[meta["split"]=="train"])
val_ds   = make_ds(meta[meta["split"]=="val"], training=False)
test_ds  = make_ds(meta[meta["split"]=="test"], training=False)

print("Datasetler hazır ✅")


2025-09-12 22:33:52.082589: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1757716432.288052      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1757716432.348215      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Sınıflar: ['akiec', 'bcc', 'bkl', 'df', 'mel', 'nv', 'vasc']
Datasetler hazır ✅


I0000 00:00:1757716444.706309      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1757716444.707002      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5


In [4]:
import tensorflow as tf

def build_fusion_model(num_classes, vocab_size=4000, seq_len=24, embed_dim=64):
    # 🔹 Görsel taraf
    img_in = tf.keras.Input(shape=(224,224,3), name="image")
    x = tf.keras.applications.mobilenet_v2.preprocess_input(img_in*255.0)
    base = tf.keras.applications.MobileNetV2(
        include_top=False, weights="imagenet", input_shape=(224,224,3))
    base.trainable = False  # önce dondur
    x = base(x, training=False)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    # 🔹 Text taraf
    txt_in = tf.keras.Input(shape=(), dtype=tf.string, name="text")
    vectorizer = tf.keras.layers.TextVectorization(
        max_tokens=vocab_size, output_mode="int", output_sequence_length=seq_len
    )
    # Adapt etmeyi unutma (train text ile yapacağız)
    t = vectorizer(txt_in)
    t = tf.keras.layers.Embedding(vocab_size, embed_dim, mask_zero=True)(t)
    t = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64))(t)
    t = tf.keras.layers.Dropout(0.2)(t)

    # 🔹 Füzyon
    z = tf.keras.layers.Concatenate()([x, t])
    z = tf.keras.layers.Dense(256, activation="relu")(z)
    z = tf.keras.layers.Dropout(0.3)(z)
    out = tf.keras.layers.Dense(num_classes, activation="softmax")(z)

    model = tf.keras.Model(inputs=[img_in, txt_in], outputs=out)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-3),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model, vectorizer

# Model oluştur
model, text_vec = build_fusion_model(num_classes=len(class_names))
print(model.summary())

# TextVectorization adapt → train text ile
train_texts = meta[meta["split"]=="train"]["text"].astype(str).tolist()
text_vec.adapt(tf.data.Dataset.from_tensor_slices(train_texts).batch(256))
print("Text vectorizer adapte edildi ✅")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


None
Text vectorizer adapte edildi ✅


In [5]:
# Class imbalance için ağırlık hesapla
from collections import Counter

cnt = Counter(meta[meta["split"]=="train"]["dx"])
total = sum(cnt.values())
class_weight = {class_to_idx[c]: total/(len(class_names)*cnt[c]) for c in class_names}
print("Class weights:", class_weight)

# Callback'ler
ckpt = tf.keras.callbacks.ModelCheckpoint(
    "/kaggle/working/best_model.keras", monitor="val_accuracy", save_best_only=True, verbose=1
)
es = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy", patience=3, restore_best_weights=True
)

# Eğitim
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    class_weight=class_weight,
    callbacks=[ckpt, es]
)

# Test sonucu
print("Test sonucu:", model.evaluate(test_ds, verbose=0))


Class weights: {0: 4.37305053025577, 1: 2.7817460317460316, 2: 1.3022478172023035, 3: 12.36331569664903, 4: 1.285530900421786, 5: 0.21338772031292808, 6: 10.115440115440116}
Epoch 1/10


I0000 00:00:1757716457.885797      59 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step - accuracy: 0.3874 - loss: 2.4485
Epoch 1: val_accuracy improved from -inf to 0.53728, saving model to /kaggle/working/best_model.keras
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 99ms/step - accuracy: 0.3879 - loss: 2.4440 - val_accuracy: 0.5373 - val_loss: 1.3360
Epoch 2/10
[1m219/220[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 69ms/step - accuracy: 0.4983 - loss: 1.6045
Epoch 2: val_accuracy improved from 0.53728 to 0.64048, saving model to /kaggle/working/best_model.keras
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 86ms/step - accuracy: 0.4993 - loss: 1.5996 - val_accuracy: 0.6405 - val_loss: 1.0197
Epoch 3/10
[1m219/220[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 68ms/step - accuracy: 0.5651 - loss: 1.4045
Epoch 3: val_accuracy did not improve from 0.64048
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 82ms/step - accur

In [6]:
# ✅ Class imbalance için ağırlıklar
from collections import Counter
cnt = Counter(meta[meta["split"]=="train"]["dx"])
total = sum(cnt.values())
class_weight = {class_to_idx[c]: total/(len(class_names)*cnt[c]) for c in class_names}
print("Class weights:", class_weight)

# ✅ Data augmentation (görseller için)
data_augmentation = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.1),
    tf.keras.layers.RandomZoom(0.1),
    tf.keras.layers.RandomContrast(0.1),
])

# Dataset pipeline’ını augmentation ile güncelle
def make_ds(df, training=True, batch_size=32):
    paths = tf.convert_to_tensor(df["filepath"].values)
    texts = tf.convert_to_tensor(df["text"].values)
    labels = tf.one_hot(df["label"].values, num_classes)

    ds = tf.data.Dataset.from_tensor_slices((paths, texts, labels))

    def _map(path, text, label):
        img = tf.io.read_file(path)
        img = tf.io.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, IMG_SIZE)
        img = tf.cast(img, tf.float32)/255.0
        if training:
            img = data_augmentation(img)
        return ({"image": img, "text": text}, label)

    if training:
        ds = ds.shuffle(2048)
    return ds.map(_map, num_parallel_calls=AUTOTUNE).batch(batch_size).prefetch(AUTOTUNE)

train_ds = make_ds(meta[meta["split"]=="train"])
val_ds   = make_ds(meta[meta["split"]=="val"], training=False)
test_ds  = make_ds(meta[meta["split"]=="test"], training=False)

# ✅ Callback'ler
ckpt = tf.keras.callbacks.ModelCheckpoint(
    "/kaggle/working/best_model.keras", monitor="val_auc", mode="max",
    save_best_only=True, verbose=1
)
es = tf.keras.callbacks.EarlyStopping(
    monitor="val_auc", mode="max",
    patience=5, restore_best_weights=True
)
lr_sched = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_auc", mode="max",
    factor=0.5, patience=2, verbose=1, min_lr=1e-7
)

# ✅ Eğitim
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,   # daha uzun tut, early stopping ile durur
    class_weight=class_weight,
    callbacks=[ckpt, es, lr_sched]
)

# ✅ Test sonucu
print("Test sonucu:", model.evaluate(test_ds, verbose=0))


Class weights: {0: 4.37305053025577, 1: 2.7817460317460316, 2: 1.3022478172023035, 3: 12.36331569664903, 4: 1.285530900421786, 5: 0.21338772031292808, 6: 10.115440115440116}
Epoch 1/30
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 258ms/step - accuracy: 0.5560 - loss: 1.3742 - val_accuracy: 0.6904 - val_loss: 0.8373 - learning_rate: 0.0010
Epoch 2/30


  self._save_model(epoch=epoch, batch=None, logs=logs)
  current = self.get_monitor_value(logs)
  callback.on_epoch_end(epoch, logs)


[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 257ms/step - accuracy: 0.5741 - loss: 1.2622 - val_accuracy: 0.6698 - val_loss: 0.9387 - learning_rate: 0.0010
Epoch 3/30
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 256ms/step - accuracy: 0.6025 - loss: 1.1825 - val_accuracy: 0.7144 - val_loss: 0.7765 - learning_rate: 0.0010
Epoch 4/30
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 257ms/step - accuracy: 0.6275 - loss: 1.0595 - val_accuracy: 0.6644 - val_loss: 0.9468 - learning_rate: 0.0010
Epoch 5/30
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 257ms/step - accuracy: 0.6288 - loss: 1.2119 - val_accuracy: 0.6558 - val_loss: 0.8986 - learning_rate: 0.0010
Epoch 6/30
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m57s[0m 257ms/step - accuracy: 0.6041 - loss: 1.1694 - val_accuracy: 0.6605 - val_loss: 0.9423 - learning_rate: 0.0010
Epoch 7/30
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [7]:
# ✅ Mevcut modeli kaydet
model.save("/kaggle/working/ham10000_model_v1.keras")

print("Mevcut model kaydedildi ✅ /kaggle/working/ham10000_model_v1.keras")

Mevcut model kaydedildi ✅ /kaggle/working/ham10000_model_v1.keras


In [8]:
# 🚀 Yeni deneme: val_auc metrikli
from tensorflow import keras

# Tekrar compile (AUC dahil)
model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="categorical_crossentropy",
    metrics=["accuracy", keras.metrics.AUC(name="auc")]
)

# Class weights hesaplamıştık zaten -> class_weight değişkenini kullan
# Yeni callback'ler
ckpt = keras.callbacks.ModelCheckpoint(
    "/kaggle/working/best_model_auc.keras",
    monitor="val_auc", save_best_only=True, mode="max", verbose=1
)
es = keras.callbacks.EarlyStopping(
    monitor="val_auc", patience=5, restore_best_weights=True, mode="max"
)
rlr = keras.callbacks.ReduceLROnPlateau(
    monitor="val_auc", factor=0.5, patience=2, verbose=1, mode="max"
)

# Eğitim
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=50,   # uzun eğitim için daha fazla epoch
    class_weight=class_weight,
    callbacks=[ckpt, es, rlr]
)

# Test sonucu
print("Test sonucu:", model.evaluate(test_ds, verbose=0))

Epoch 1/50
[1m219/220[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 236ms/step - accuracy: 0.7161 - auc: 0.9522 - loss: 0.8717
Epoch 1: val_auc improved from -inf to 0.95730, saving model to /kaggle/working/best_model_auc.keras
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 262ms/step - accuracy: 0.7166 - auc: 0.9523 - loss: 0.8693 - val_accuracy: 0.7510 - val_auc: 0.9573 - val_loss: 0.7408 - learning_rate: 0.0010
Epoch 2/50
[1m219/220[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 242ms/step - accuracy: 0.7269 - auc: 0.9559 - loss: 0.8122
Epoch 2: val_auc did not improve from 0.95730
[1m220/220[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m56s[0m 255ms/step - accuracy: 0.7273 - auc: 0.9560 - loss: 0.8099 - val_accuracy: 0.7397 - val_auc: 0.9544 - val_loss: 0.7604 - learning_rate: 0.0010
Epoch 3/50
[1m219/220[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 242ms/step - accuracy: 0.7299 - auc: 0.9558 - loss: 0.7979
Epoch 3: val_auc did not 