<a href="https://colab.research.google.com/github/SelenArikan/Cancer/blob/main/CancerDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import glob
import json
import shutil
import zipfile
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks, applications
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import f1_score, accuracy_score, recall_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns


kaggle_username = "BURAYA_KAGGLE_KULLANICI_ADINI_YAZ"
kaggle_key = "xxxxxxxxxxx"

print("Starting")


api_token = {"username": kaggle_username, "key": kaggle_key}
!mkdir -p ~/.kaggle
with open('kaggle.json', 'w') as file:
    json.dump(api_token, file)
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


print("\n Dataset loading...")
if os.path.exists("breast-histopathology-images.zip"):
    os.remove("breast-histopathology-images.zip")
if os.path.exists("/content/dataset"):
    shutil.rmtree("/content/dataset")

!kaggle datasets download -d paultimothymooney/breast-histopathology-images --force

print(" Zip ")
with zipfile.ZipFile("breast-histopathology-images.zip", 'r') as zip_ref:
    zip_ref.extractall("/content/dataset")


base_dir = '/content/dataset'
image_paths = glob.glob(os.path.join(base_dir, '**', '*.png'), recursive=True)
data = [{'filepath': p, 'label': '0' if p.endswith('class0.png') else '1'} for p in image_paths]
df = pd.DataFrame(data)

train_val, test_df = train_test_split(df, test_size=0.2, stratify=df['label'], random_state=42)
train_df, val_df = train_test_split(train_val, test_size=0.2, stratify=train_val['label'], random_state=42)

print(f" Dataset Ready! Learning set: {len(train_df)} picture.")


IMG_SIZE = 96
BATCH_SIZE = 32

train_datagen = ImageDataGenerator(
    rotation_range=180,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    zoom_range=0.2,
    fill_mode='reflect'

)
val_test_datagen = ImageDataGenerator()

train_gen = train_datagen.flow_from_dataframe(
    train_df, x_col='filepath', y_col='label', target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE, class_mode='binary'
)
val_gen = val_test_datagen.flow_from_dataframe(
    val_df, x_col='filepath', y_col='label', target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE, class_mode='binary'
)
test_gen = val_test_datagen.flow_from_dataframe(
    test_df, x_col='filepath', y_col='label', target_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE, class_mode='binary', shuffle=False
)


def create_hybrid_model():
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))


    backbone = applications.EfficientNetB0(include_top=False, weights='imagenet', input_tensor=inputs)
    backbone.trainable = True

    x = backbone.output
    x = layers.Reshape((-1, x.shape[-1]))(x)


    num_heads = 4
    embed_dim = x.shape[-1]

    x1 = layers.LayerNormalization(epsilon=1e-6)(x)
    att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=0.1)(x1, x1)
    x2 = layers.Add()([att, x])

    x = layers.GlobalAveragePooling1D()(x2)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(128, activation='relu')(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)

    return models.Model(inputs=inputs, outputs=outputs)

model = create_hybrid_model()


y_ints = train_df['label'].astype(int).values
weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_ints), y=y_ints)
class_weights = {i: weights[i] for i in range(2)}

model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.BinaryAccuracy(name='accuracy'), tf.keras.metrics.AUC(name='auc')]
)

callbacks_list = [
    callbacks.EarlyStopping(monitor='val_auc', patience=5, restore_best_weights=True, mode='max'),
    callbacks.ModelCheckpoint('best_hybrid_model.keras', monitor='val_auc', save_best_only=True, mode='max')
]

print("\n Training start")
history = model.fit(
    train_gen, epochs=15, validation_data=val_gen,
    class_weight=class_weights, callbacks=callbacks_list
)


print("\n Test prediction")
test_gen.reset()
y_pred_probs = model.predict(test_gen, verbose=1).ravel()
y_true = test_gen.classes

print("\n Finding best F1 score ")

best_f1 = 0.0
target_recall_thresh = 0.1
found_90_recall = False


for t in np.arange(0.01, 0.95, 0.01):
    preds = (y_pred_probs >= t).astype(int)
    val_f1 = f1_score(y_true, preds)
    val_recall = recall_score(y_true, preds)

    if val_f1 > best_f1:
        best_f1 = val_f1

    if val_recall >= 0.90:
        target_recall_thresh = t
        found_90_recall = True

final_thresh = target_recall_thresh if found_90_recall else 0.5

print("-" * 40)
print(f"Selected final threshold: {final_thresh:.3f}")
if found_90_recall:
    print("Succed! Recall > %90 .")
else:
    print(" Recal<90, We used standart threshold.")
print("-" * 40)

final_preds = (y_pred_probs >= final_thresh).astype(int)
print(classification_report(y_true, final_preds, target_names=['Normal', 'Cancer']))


cm = confusion_matrix(y_true, final_preds)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt='d', cmap='Purples', xticklabels=['Normal', 'Cancer'], yticklabels=['Normal', 'Cancer'])
plt.title(f'Final Matrix (Treshold: {final_thresh:.2f})')
plt.show()