#### 📌 **HİPERPARAMETRELERİ BURADAN AYARLA**

In [1]:
learning_rate = 1e-3       
batch_size = 32           
pca_variance = 0.97       # PCA varyans koruma oranı
roi_crop_size = (256,256)
patch_size = (128,128)
mlp_hidden_units = [128, 256,512]
mlp_dropout_rate = 0.55
mlp_epochs = 55
mlp_earlystop_patience = 9
pixels_per_cell=(16,16)
cells_per_block=(2,2)
orientations=10

In [2]:
import os, re, joblib
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog, greycomatrix, greycoprops
from skimage.filters import threshold_otsu, median
from skimage.morphology import opening, disk
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import classification_report, f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from joblib import Parallel, delayed
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


#### 📁 **1. Görüntü Yolu Listeleme Fonksiyonu ve Ön işleme**


In [3]:
def process_single_image(args):
    img_path, label, classes = args
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    thr = threshold_otsu(img)
    mask = opening((img>thr).astype(np.uint8), disk(roi_crop_size[0]//10))
    img = cv2.bitwise_and(img, img, mask=mask)
    img = cv2.resize(img, roi_crop_size)
    img = img.astype(np.float32)

    center = np.array(roi_crop_size) // 2
    ps = np.array(patch_size) // 2
    patch = img[center[0]-ps[0]:center[0]+ps[0], center[1]-ps[1]+0:center[1]+ps[1]]

    patch = median(patch)
    patch = cv2.GaussianBlur(patch, (3, 3), 0)
    patch = patch.astype(np.float32) / 255.0

    return patch, classes.index(label)


In [4]:
def extract_and_preprocess_parallel(root_dir, classes, n_jobs=-1):
    pattern = re.compile(r'^[A-Z]_\d+_\d+\.(LEFT|RIGHT)_(CC|MLO)\.jpg$', re.IGNORECASE)
    siniflar = ['benign', 'cancer', 'normal']
    yollar, etiketler = [], []

    for cls in siniflar:
        cls_yolu = os.path.join(root_dir, cls)
        if not os.path.isdir(cls_yolu):
            continue
        for durum_id in os.listdir(cls_yolu):
            durum_yolu = os.path.join(cls_yolu, durum_id)
            if not os.path.isdir(durum_yolu):
                continue
            for dosya_adi in os.listdir(durum_yolu):
                if pattern.match(dosya_adi):
                    yollar.append(os.path.join(durum_yolu, dosya_adi))
                    etiketler.append(cls)

    # === Paralel işleme ===
    arg_list = [(img_path, lbl, classes) for img_path, lbl in zip(yollar, etiketler)]
    results = Parallel(n_jobs=n_jobs)(delayed(process_single_image)(args) for args in arg_list)

    X, y = zip(*results)
    np.save("mask_morfolojik_x",X)
    np.save("mask_morfolojik_y",y)
    return np.array(X), np.array(y)

#### 🧼 **2. ROI extraction**


In [5]:
def extract_features_from_image(im):
    h = hog(im, pixels_per_cell=pixels_per_cell,
                cells_per_block=cells_per_block,
                orientations=orientations)
    g = greycomatrix((im*255).astype(np.uint8), [1], [0], levels=256)
    glcm = [greycoprops(g, prop)[0, 0] for prop in ('contrast', 'homogeneity', 'energy')]
    area = np.sum(im > 0.5)
    perimeter = np.sum(cv2.Canny((im * 255).astype(np.uint8), 50, 150) > 0)
    ecc = 0
    if area > 10:
        pts = np.argwhere(im > 0.5)
        rect = cv2.minAreaRect(pts.astype(np.float32))
        ecc = rect[1][0] / (rect[1][1] + 1e-8)
    return np.hstack([h, glcm, area, perimeter, ecc])

def extract_features(X, out_prefix=None, n_jobs=-1):
    # 2. UPDATE THE FUNCTION NAME INSIDE THE PARALLEL CALL HERE
    feats = Parallel(n_jobs=n_jobs)(delayed(extract_features_from_image)(im) for im in X)
    feats = np.array(feats)
    pca = PCA(n_components=pca_variance)
    X_feat = pca.fit_transform(feats)
    return X_feat, pca

#### 🧠 **3. CNN Model Tanımı**

In [6]:
def create_mlp_model(input_dim, num_classes, hidden_units, dropout_rate):
    model = Sequential([
        Dense(hidden_units[0], activation='relu', input_shape=(input_dim,)),
        Dropout(dropout_rate),
        Dense(hidden_units[1], activation='relu'),
        Dropout(dropout_rate),
        Dense(num_classes, activation='softmax')
    ])
    return model

#### 🚂 **4. Model Eğitimi ve Kaydetme**


In [7]:
def MLP(X_tr, X_test, y_tr, y_test,X_feat,classes):
    
    y_tr_cat = pd.get_dummies(y_tr).values
    # model
    model = create_mlp_model(
        input_dim    = X_feat.shape[1],
        num_classes  = len(classes),
        hidden_units = mlp_hidden_units,
        dropout_rate = mlp_dropout_rate
    )
    model.compile(optimizer=Adam(learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    callbacks = [
        EarlyStopping(patience=mlp_earlystop_patience, restore_best_weights=True),
        ReduceLROnPlateau(patience=mlp_earlystop_patience//2, factor=0.5)
    ]
    model.fit(X_tr, y_tr_cat,
              validation_split=0.1,
              epochs=mlp_epochs,
              batch_size=batch_size,
              callbacks=callbacks,
              verbose=1)
    olasiliklar = model.predict(X_test)
    esikler = np.linspace(0.2, 0.8, 41)
    en_iyi_f1, en_iyi_T_m, en_iyi_T_b = 0, 0.47, 0.53
    for T_m_aday in esikler:
        for T_b_aday in esikler:
            tahminler = [2 if p[2] >= T_m_aday else 1 if p[1] >= T_b_aday else 0 for p in olasiliklar]
            f1 = f1_score(y_test, tahminler, average='macro')
            if f1 > en_iyi_f1:
                en_iyi_f1, en_iyi_T_m, en_iyi_T_b = f1, T_m_aday, T_b_aday
    nihai_tahminler = [2 if p[2] >= en_iyi_T_m else 1 if p[1] >= en_iyi_T_b else 0 for p in olasiliklar]
    print()
    print("---------------------MLP_Results---------------------")
    print(f"En iyi eşik sınırı: T_m={en_iyi_T_m:.2f}, T_b={en_iyi_T_b:.2f}")
    print(classification_report(y_test, nihai_tahminler, target_names=['normal','benign','cancer']))
    print(confusion_matrix(y_test, nihai_tahminler))
    print("Accuracy:", accuracy_score(y_test, nihai_tahminler))
    print("Precision:", precision_score(y_test, nihai_tahminler, average='macro'))
    print("Recall:", recall_score(y_test, nihai_tahminler, average='macro'))
    try:
        print("AUC:", roc_auc_score(pd.get_dummies(y_test), olasiliklar, average='macro', multi_class='ovr'))
    except:
        print("AUC hesaplanamadı (muhtemelen tek sınıf tahmini nedeniyle)")

    model_name="MLP_1_3"
    model.save(f"{model_name}.h5")
    print(f"Model kaydedildi: {model_name}")


 Metirkler**

#### 💾 **6. Model ve Eşik Kayıtları**

#### 📌 **HİPERPARAMETRELERİ BURADAN AYARLA**

In [8]:

n_estimators=1000
max_depth=40
random_state=45

#### 🧠 **3. CNN Model Tanımı**

In [9]:
def Random_Forest(X_tr, X_test, y_tr, y_test,X_feat,classes):
    
    # model tanımı
    model = RandomForestClassifier(n_estimators=n_estimators,
                                   max_depth=max_depth,
                                   random_state=random_state,min_samples_split=2,min_samples_leaf=3)
    model.fit(X_tr, y_tr)
    # kaydet
    
    
    # değerlendirme
    olasiliklar = model.predict_proba(X_test)
    esikler = np.linspace(0.2, 0.8, 41)
    en_iyi_f1, en_iyi_T_m, en_iyi_T_b = 0, 0.49, 0.51
    for T_m_aday in esikler:
        for T_b_aday in esikler:
            tahminler = [2 if p[2] >= T_m_aday else 1 if p[1] >= T_b_aday else 0 for p in olasiliklar]
            f1 = f1_score(y_test, tahminler, average='macro')
            if f1 > en_iyi_f1:
                en_iyi_f1, en_iyi_T_m, en_iyi_T_b = f1, T_m_aday, T_b_aday
    nihai_tahminler = [2 if p[2] >= en_iyi_T_m else 1 if p[1] >= en_iyi_T_b else 0 for p in olasiliklar]
    print()
    print("-----------------RF_Results-----------------")
    print(f"En iyi eşik sınırı: T_m={en_iyi_T_m:.2f}, T_b={en_iyi_T_b:.2f}")
    print(classification_report(y_test, nihai_tahminler, target_names=['normal','benign','cancer']))
    print(confusion_matrix(y_test, nihai_tahminler))
    print("Accuracy:", accuracy_score(y_test, nihai_tahminler))
    print("Precision:", precision_score(y_test, nihai_tahminler, average='macro'))
    print("Recall:", recall_score(y_test, nihai_tahminler, average='macro'))
    try:
        print("AUC:", roc_auc_score(pd.get_dummies(y_test), olasiliklar, average='macro', multi_class='ovr'))
    except:
        print("AUC hesaplanamadı (muhtemelen tek sınıf tahmini nedeniyle)")

    joblib.dump(model, "RF_1_3.pkl")
    print(f"Model kaydedildi:")

#### 📌 **HİPERPARAMETRELERİ BURADAN AYARLA**

In [10]:
learning_rate = 1e-5      
xgb_n_estimators = 300
xgb_max_depth = 8
xgb_learning_rate = 0.1
xgb_subsample = 0.75
xgb_colsample_bytree = 0.8
xgb_earlystop_rounds = 32

#### 🧠 **3. CNN Model Tanımı**

In [11]:
def XGBoost(X_tr, X_test, y_tr, y_test,X_feat,classes):
    dtrain = xgb.DMatrix(X_tr, label=y_tr)
    dtest  = xgb.DMatrix(X_test, label=y_test)
    params = {
        'objective':'multi:softprob',
        'num_class':3,
        'eta':xgb_learning_rate,
        'max_depth':xgb_max_depth,
        'subsample':xgb_subsample,
        'colsample_bytree':xgb_colsample_bytree,
        'eval_metric':'mlogloss'
    }
    model = xgb.train(params, dtrain, num_boost_round=xgb_n_estimators,
                    early_stopping_rounds=xgb_earlystop_rounds,
                    evals=[(dtest,'eval')], verbose_eval=False)
  

    dtest_predict = xgb.DMatrix(X_test)
    olasiliklar = model.predict(dtest_predict) # Corrected line: passing dtest_predict instead of X_test
    esikler = np.linspace(0.2, 0.8, 41)
    en_iyi_f1, en_iyi_T_m, en_iyi_T_b = 0, 0.48, 0.52
    for T_m_aday in esikler:
        for T_b_aday in esikler:
            tahminler = [2 if p[2] >= T_m_aday else 1 if p[1] >= T_b_aday else 0 for p in olasiliklar]
            f1 = f1_score(y_test, tahminler, average='macro')
            if f1 > en_iyi_f1:
                en_iyi_f1, en_iyi_T_m, en_iyi_T_b = f1, T_m_aday, T_b_aday
    nihai_tahminler = [2 if p[2] >= en_iyi_T_m else 1 if p[1] >= en_iyi_T_b else 0 for p in olasiliklar]

    print("------------XPBoost------------------")
    print(f"En iyi eşik sınırı: T_m={en_iyi_T_m:.2f}, T_b={en_iyi_T_b:.2f}")
    print(classification_report(y_test, nihai_tahminler, target_names=['normal','benign','cancer']))
    print(confusion_matrix(y_test, nihai_tahminler))
    print("Accuracy:", accuracy_score(y_test, nihai_tahminler))
    print("Precision:", precision_score(y_test, nihai_tahminler, average='macro'))
    print("Recall:", recall_score(y_test, nihai_tahminler, average='macro'))
    try:
        print("AUC:", roc_auc_score(pd.get_dummies(y_test), olasiliklar, average='macro', multi_class='ovr'))
    except:
        print("AUC hesaplanamadı (muhtemelen tek sınıf tahmini nedeniyle)")

    model.save_model(f"XGBoost_1_3.json")
    print("Model kaydedildi:")

#### 🧠 **3. CNN Model Tanımı**

In [12]:
if __name__ == '__main__':
    classes = ['normal','benign','cancer']
    kok_dizin = os.path.join(os.getcwd(), "Project1")
    X, y = extract_and_preprocess_parallel(kok_dizin, classes,n_jobs=-1)
    X_feat, pca = extract_features(X)
    X_tr, X_test, y_tr, y_test = train_test_split(X_feat, y, test_size=0.1,
                                               stratify=y, random_state=42)
    np.save("X_test_1_3.npy", X_test)
    np.save("y_test_1_3.npy", y_test)
    np.savez("X_feat_1_3.npz", X_feat)
    np.savez("X_pca_1_3.npz", pca)
    np.save("Xtr_preprocessed_1_3.npy", X_tr)
    np.save("ytr_preprocessed_1_3.npy", y_tr)
    
    MLP(X_tr, X_test, y_tr, y_test,X_feat,classes)
    
    

Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 6/55
Epoch 7/55
Epoch 8/55
Epoch 9/55
Epoch 10/55
Epoch 11/55
Epoch 12/55
Epoch 13/55

---------------------MLP_Results---------------------
En iyi eşik sınırı: T_m=0.22, T_b=0.64
              precision    recall  f1-score   support

      normal       0.42      0.53      0.47       241
      benign       0.39      0.69      0.50       268
      cancer       0.38      0.01      0.02       272

    accuracy                           0.40       781
   macro avg       0.40      0.41      0.33       781
weighted avg       0.40      0.40      0.32       781

[[128 110   3]
 [ 82 184   2]
 [ 92 177   3]]
Accuracy: 0.4033290653008963
Precision: 0.39649974456677123
Recall: 0.4095723026313393
AUC: 0.5617580251800322
Model kaydedildi: MLP_1_3


In [13]:
Random_Forest(X_tr, X_test, y_tr, y_test,X_feat,classes)


-----------------RF_Results-----------------
En iyi eşik sınırı: T_m=0.46, T_b=0.43
              precision    recall  f1-score   support

      normal       0.39      0.51      0.44       241
      benign       0.39      0.34      0.36       268
      cancer       0.38      0.32      0.35       272

    accuracy                           0.39       781
   macro avg       0.39      0.39      0.38       781
weighted avg       0.39      0.39      0.38       781

[[124  51  66]
 [ 99  90  79]
 [ 95  89  88]]
Accuracy: 0.3866837387964149
Precision: 0.38630795272593416
Recall: 0.3912910429546191
AUC: 0.5425813590844639
Model kaydedildi:


In [14]:
XGBoost(X_tr, X_test, y_tr, y_test,X_feat,classes)

------------XPBoost------------------
En iyi eşik sınırı: T_m=0.37, T_b=0.37
              precision    recall  f1-score   support

      normal       0.40      0.41      0.40       241
      benign       0.37      0.28      0.32       268
      cancer       0.38      0.46      0.42       272

    accuracy                           0.38       781
   macro avg       0.38      0.38      0.38       781
weighted avg       0.38      0.38      0.38       781

[[ 99  53  89]
 [ 77  76 115]
 [ 73  74 125]]
Accuracy: 0.38412291933418696
Precision: 0.3839712692084763
Recall: 0.38464309827479637
AUC: 0.5699903599561056
Model kaydedildi:
