<a href="https://colab.research.google.com/github/SuhedaTaspinar/skin-cancer-classification/blob/main/skinCancerClassificationModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Dosya yollarını otomatik olarak bulalım
metadata_file = None
data_dir1 = None
data_dir2 = None

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        file_path = os.path.join(dirname, filename)
        if "HAM10000_metadata.csv" in filename:
            metadata_file = file_path
        elif "HAM10000_images_part_1" in dirname and filename.endswith('.jpg'):
            data_dir1 = dirname
        elif "HAM10000_images_part_2" in dirname and filename.endswith('.jpg'):
            data_dir2 = dirname

if metadata_file is None or data_dir1 is None or data_dir2 is None:
    raise FileNotFoundError("Gerekli dosya yolları bulunamadı.")

# Metadata yükle
metadata = pd.read_csv(metadata_file)

# Görüntü yolları ve etiketler
image_paths = []
labels = metadata['dx'].values

for img_id in metadata['image_id']:
    img_path1 = os.path.join(data_dir1, f"{img_id}.jpg")
    img_path2 = os.path.join(data_dir2, f"{img_id}.jpg")
    if os.path.exists(img_path1):
        image_paths.append(img_path1)
    elif os.path.exists(img_path2):
        image_paths.append(img_path2)
    else:
        print(f"Görüntü yolu bulunamadı: {img_id}")

# Etiketleri encode edelim
label_dict = {label: idx for idx, label in enumerate(np.unique(labels))}
labels = np.array([label_dict[label] for label in labels])

# Görüntü boyutunu belirleyelim ve verileri hazırlayalım
img_size = 128
images = []
for img_path in image_paths:
    img = cv2.imread(img_path)
    if img is None:
        print(f"Görüntü yüklenemedi: {img_path}")
        continue
    img = cv2.resize(img, (img_size, img_size))
    images.append(img)

images = np.array(images)
labels = to_categorical(labels, num_classes=len(label_dict))

# Veriyi eğitim ve test olarak ayıralım
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)

# Model yapısı
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size, img_size, 3)),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(256, (3, 3), activation='relu'),
    BatchNormalization(),
    MaxPooling2D(pool_size=(2, 2)),
    GlobalAveragePooling2D(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(label_dict), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Gelişmiş veri artırma
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    brightness_range=[0.7, 1.3],
    fill_mode='nearest'
)

# Callback'ler
early_stopping = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4)

# Modeli eğitme
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_test, y_test),
    epochs=50,
    steps_per_epoch=len(X_train) // 32,
    callbacks=[early_stopping, reduce_lr]
)

# Modeli değerlendirme
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Doğruluğu: {test_accuracy * 100:.2f}%")

# Eğitim ve doğrulama sonuçlarını görselleştirme
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Eğitim Doğruluğu')
plt.plot(history.history['val_accuracy'], label='Doğrulama Doğruluğu')
plt.xlabel('Epoch')
plt.ylabel('Doğruluk')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Eğitim Kaybı')
plt.plot(history.history['val_loss'], label='Doğrulama Kaybı')
plt.xlabel('Epoch')
plt.ylabel('Kayıp')
plt.legend()

plt.show()

# Modeli kaydetme
model_save_path = '/kaggle/working/ham10000_skin_cancer_model.h5'
model.save(model_save_path)
print(f"Model {model_save_path} yoluna kaydedildi.")

from IPython.display import FileLink
FileLink(model_save_path)