#### 📌 **HİPERPARAMETRELERİ BURADAN AYARLA**

In [29]:
learning_rate = 1e-4      
batch_size = 64           
pca_variance = 0.96       # PCA varyans koruma oranı
roi_crop_size = (256,256)
patch_size = (128,128)
mlp_hidden_units = [256,528,1024]
mlp_dropout_rate = 0.5
mlp_epochs = 60
mlp_earlystop_patience = 15
pixels_per_cell=(8,8)
cells_per_block=(2,2)
orientations=9

In [30]:
import os, re, joblib
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog, greycomatrix, greycoprops
from skimage.filters import threshold_otsu, median
from skimage.morphology import opening, disk
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import classification_report, f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
from joblib import Parallel, delayed
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau


#### 📁 **1. Görüntü Yolu Listeleme Fonksiyonu ve Ön işleme**


In [31]:
def process_single_image(args):
    img_path, label, classes = args
    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
    img = cv2.resize(img, roi_crop_size)
    img = img.astype(np.float32)

    center = np.array(roi_crop_size) // 2
    ps = np.array(patch_size) // 2
    patch = img[center[0]-ps[0]:center[0]+ps[0], center[1]-ps[1]+0:center[1]+ps[1]]

    patch = median(patch)
    patch = cv2.GaussianBlur(patch, (3, 3), 0)
    patch = patch.astype(np.float32) / 255.0

    return patch, classes.index(label)


In [32]:
def extract_and_preprocess_parallel(root_dir, classes, n_jobs=-1):
    class_dirs = {
        'normal': ['Birad1'],
        'benign': ['Birad3'],
        'cancer': ['Birad4', 'Birad5'],
    }
    paths, labels = [], []
    for cls, subdirs in class_dirs.items():
        for sd in subdirs:
            dir_path = os.path.join(root_dir, sd)
            if not os.path.isdir(dir_path):
                continue
            for fname in sorted(os.listdir(dir_path)):
                if fname.lower().endswith(('.jpg', '.jpeg', '.png')):
                    paths.append(os.path.join(dir_path, fname))
                    labels.append(cls)
    

    # === Paralel işleme ===
    arg_list = [(img_path, lbl, classes) for img_path, lbl in zip(paths, labels)]
    results = Parallel(n_jobs=n_jobs)(delayed(process_single_image)(args) for args in arg_list)

    X, y = zip(*results)
    return np.array(X), np.array(y)

#### 🧼 **2. ROI extraction**


In [33]:
def extract_features_from_image(im):
    h = hog(im, pixels_per_cell=pixels_per_cell,
                cells_per_block=cells_per_block,
                orientations=orientations)
    g = greycomatrix((im*255).astype(np.uint8), [1], [0], levels=256)
    glcm = [greycoprops(g, prop)[0, 0] for prop in ('contrast', 'homogeneity', 'energy')]
    area = np.sum(im > 0.5)
    perimeter = np.sum(cv2.Canny((im * 255).astype(np.uint8), 50, 150) > 0)
    ecc = 0
    if area > 10:
        pts = np.argwhere(im > 0.5)
        rect = cv2.minAreaRect(pts.astype(np.float32))
        ecc = rect[1][0] / (rect[1][1] + 1e-8)
    return np.hstack([h, glcm, area, perimeter, ecc])

def extract_features(X, out_prefix=None, n_jobs=-1):
    # 2. UPDATE THE FUNCTION NAME INSIDE THE PARALLEL CALL HERE
    feats = Parallel(n_jobs=n_jobs)(delayed(extract_features_from_image)(im) for im in X)
    feats = np.array(feats)
    pca = PCA(n_components=pca_variance)
    X_feat = pca.fit_transform(feats)
    return X_feat, pca

#### 🧠 **3. CNN Model Tanımı**

In [34]:
def create_mlp_model(input_dim, num_classes, hidden_units, dropout_rate):
    model = Sequential([
        Dense(hidden_units[0], activation='relu', input_shape=(input_dim,)),
        Dense(hidden_units[0], activation='relu', input_shape=(input_dim,)),
        Dropout(dropout_rate),
        Dense(hidden_units[1], activation='relu'),
        Dropout(dropout_rate),
        Dense(num_classes, activation='softmax')
    ])
    return model

#### 🚂 **4. Model Eğitimi ve Kaydetme**


In [35]:
def MLP(X_tr, X_test, y_tr, y_test,X_feat,classes):
    
    y_tr_cat = pd.get_dummies(y_tr).values
    # model
    model = create_mlp_model(
        input_dim    = X_feat.shape[1],
        num_classes  = len(classes),
        hidden_units = mlp_hidden_units,
        dropout_rate = mlp_dropout_rate
    )
    model.compile(optimizer=Adam(learning_rate),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    callbacks = [
        EarlyStopping(patience=mlp_earlystop_patience, restore_best_weights=True),
        ReduceLROnPlateau(patience=mlp_earlystop_patience//2, factor=0.5)
    ]
    model.fit(X_tr, y_tr_cat,
              validation_split=0.1,
              epochs=mlp_epochs,
              batch_size=batch_size,
              callbacks=callbacks,
              verbose=1)
    olasiliklar = model.predict(X_test)
    esikler = np.linspace(0.18, 0.72, 40)
    en_iyi_f1, en_iyi_T_m, en_iyi_T_b = 0, 0.47, 0.53
    for T_m_aday in esikler:
        for T_b_aday in esikler:
            tahminler = [2 if p[2] >= T_m_aday else 1 if p[1] >= T_b_aday else 0 for p in olasiliklar]
            f1 = f1_score(y_test, tahminler, average='macro')
            if f1 > en_iyi_f1:
                en_iyi_f1, en_iyi_T_m, en_iyi_T_b = f1, T_m_aday, T_b_aday
    nihai_tahminler = [2 if p[2] >= en_iyi_T_m else 1 if p[1] >= en_iyi_T_b else 0 for p in olasiliklar]
    print()
    print("---------------------MLP_Results---------------------")
    print(f"En iyi eşik sınırı: T_m={en_iyi_T_m:.2f}, T_b={en_iyi_T_b:.2f}")
    print(classification_report(y_test, nihai_tahminler, target_names=['normal','benign','cancer']))
    print(confusion_matrix(y_test, nihai_tahminler))
    print("Accuracy:", accuracy_score(y_test, nihai_tahminler))
    print("Precision:", precision_score(y_test, nihai_tahminler, average='macro'))
    print("Recall:", recall_score(y_test, nihai_tahminler, average='macro'))
    try:
        print("AUC:", roc_auc_score(pd.get_dummies(y_test), olasiliklar, average='macro', multi_class='ovr'))
    except:
        print("AUC hesaplanamadı (muhtemelen tek sınıf tahmini nedeniyle)")

    model_name="MLP_2_2"
    model.save(f"{model_name}.h5")
    print(f"Model kaydedildi: {model_name}")


 Metirkler**

#### 💾 **6. Model ve Eşik Kayıtları**

#### 📌 **HİPERPARAMETRELERİ BURADAN AYARLA**

In [36]:

n_estimators=400
max_depth=40    
random_state=45

#### 🧠 **3. CNN Model Tanımı**

In [37]:
def Random_Forest(X_tr, X_test, y_tr, y_test,X_feat,classes):
    
    # model tanımı
    model = RandomForestClassifier(n_estimators=n_estimators,
                                   max_depth=max_depth,
                                   random_state=random_state)
    model.fit(X_tr, y_tr)
    # kaydet
    
    
    # değerlendirme
    olasiliklar = model.predict_proba(X_test)
    esikler = np.linspace(0.05, 0.95, 50)
    en_iyi_f1, en_iyi_T_m, en_iyi_T_b = 0, 0.49, 0.51
    for T_m_aday in esikler:
        for T_b_aday in esikler:
            tahminler = [2 if p[2] >= T_m_aday else 1 if p[1] >= T_b_aday else 0 for p in olasiliklar]
            f1 = f1_score(y_test, tahminler, average='macro')
            if f1 > en_iyi_f1:
                en_iyi_f1, en_iyi_T_m, en_iyi_T_b = f1, T_m_aday, T_b_aday
    nihai_tahminler = [2 if p[2] >= en_iyi_T_m else 1 if p[1] >= en_iyi_T_b else 0 for p in olasiliklar]
    print()
    print("-----------------RF_Results-----------------")
    print(f"En iyi eşik sınırı: T_m={en_iyi_T_m:.2f}, T_b={en_iyi_T_b:.2f}")
    print(classification_report(y_test, nihai_tahminler, target_names=['normal','benign','cancer']))
    print(confusion_matrix(y_test, nihai_tahminler))
    print("Accuracy:", accuracy_score(y_test, nihai_tahminler))
    print("Precision:", precision_score(y_test, nihai_tahminler, average='macro'))
    print("Recall:", recall_score(y_test, nihai_tahminler, average='macro'))
    try:
        print("AUC:", roc_auc_score(pd.get_dummies(y_test), olasiliklar, average='macro', multi_class='ovr'))
    except:
        print("AUC hesaplanamadı (muhtemelen tek sınıf tahmini nedeniyle)")

    joblib.dump(model, "RF_2_2.pkl")
    print(f"Model kaydedildi:")

#### 📌 **HİPERPARAMETRELERİ BURADAN AYARLA**

In [38]:
learning_rate = 1e-4      
xgb_n_estimators = 250
xgb_max_depth = 25
xgb_learning_rate = 0.05
xgb_subsample = 0.68
xgb_colsample_bytree = 0.76
xgb_earlystop_rounds = 25

#### 🧠 **3. CNN Model Tanımı**

In [39]:
def XGBoost(X_tr, X_test, y_tr, y_test,X_feat,classes):
    dtrain = xgb.DMatrix(X_tr, label=y_tr)
    dtest  = xgb.DMatrix(X_test, label=y_test)
    params = {
        'objective':'multi:softprob',
        'num_class':3,
        'eta':xgb_learning_rate,
        'max_depth':xgb_max_depth,
        'subsample':xgb_subsample,
        'colsample_bytree':xgb_colsample_bytree,
        'eval_metric':'mlogloss'
    }
    model = xgb.train(params, dtrain, num_boost_round=xgb_n_estimators,
                    early_stopping_rounds=xgb_earlystop_rounds,
                    evals=[(dtest,'eval')], verbose_eval=False)

    dtest_predict = xgb.DMatrix(X_test)
    olasiliklar = model.predict(dtest_predict) # Corrected line: passing dtest_predict instead of X_test
    esikler = np.linspace(0.05, 0.95, 50)
    en_iyi_f1, en_iyi_T_m, en_iyi_T_b = 0, 0.48, 0.52
    for T_m_aday in esikler:
        for T_b_aday in esikler:
            tahminler = [2 if p[2] >= T_m_aday else 1 if p[1] >= T_b_aday else 0 for p in olasiliklar]
            f1 = f1_score(y_test, tahminler, average='macro')
            if f1 > en_iyi_f1:
                en_iyi_f1, en_iyi_T_m, en_iyi_T_b = f1, T_m_aday, T_b_aday
    nihai_tahminler = [2 if p[2] >= en_iyi_T_m else 1 if p[1] >= en_iyi_T_b else 0 for p in olasiliklar]

    print("------------XPBoost------------------")
    print(f"En iyi eşik sınırı: T_m={en_iyi_T_m:.2f}, T_b={en_iyi_T_b:.2f}")
    print(classification_report(y_test, nihai_tahminler, target_names=['normal','benign','cancer']))
    print(confusion_matrix(y_test, nihai_tahminler))
    print("Accuracy:", accuracy_score(y_test, nihai_tahminler))
    print("Precision:", precision_score(y_test, nihai_tahminler, average='macro'))
    print("Recall:", recall_score(y_test, nihai_tahminler, average='macro'))
    try:
        print("AUC:", roc_auc_score(pd.get_dummies(y_test), olasiliklar, average='macro', multi_class='ovr'))
    except:
        print("AUC hesaplanamadı (muhtemelen tek sınıf tahmini nedeniyle)")

    model.save_model(f"XGBoost_2_2.json")
    print("Model kaydedildi:")

#### 🧠 **3. CNN Model Tanımı**

In [40]:
if __name__ == '__main__':
    classes = ['normal','benign','cancer']
    kok_dizin = os.path.join(os.getcwd(), "Project2")
    X, y = extract_and_preprocess_parallel(kok_dizin, classes,n_jobs=-1)
    X_feat, pca = extract_features(X)
    X_tr, X_test, y_tr, y_test = train_test_split(X_feat, y, test_size=0.1,
                                               stratify=y, random_state=42)
    np.save("X_test_2_2.npy", X_test)
    np.save("y_test_2_2.npy", y_test)
    np.savez("X_feat_2_2.npz", X_feat)
    np.savez("X_pca_2_2.npz", pca)
    np.save("Xtr_preprocessed_2_2.npy", X_tr)
    np.save("ytr_preprocessed_2_2.npy", y_tr)
    
    MLP(X_tr, X_test, y_tr, y_test,X_feat,classes)
    
    

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60

---------------------MLP_Results---------------------
En iyi eşik sınırı: T_m=0.24, T_b=0.25
              precision    recall  f1-score   support

      normal       0.84      0.70      0.76       187
      benign       0.27      0.56      0.37        39
      cancer       0.00      0.00      0.00        12

    accuracy                           0.64       238
   macro avg       0.37      0.

In [41]:
Random_Forest(X_tr, X_test, y_tr, y_test,X_feat,classes)


-----------------RF_Results-----------------
En iyi eşik sınırı: T_m=0.23, T_b=0.36
              precision    recall  f1-score   support

      normal       0.81      0.84      0.83       187
      benign       0.32      0.26      0.29        39
      cancer       0.15      0.17      0.16        12

    accuracy                           0.71       238
   macro avg       0.43      0.42      0.43       238
weighted avg       0.70      0.71      0.71       238

[[158  20   9]
 [ 27  10   2]
 [  9   1   2]]
Accuracy: 0.7142857142857143
Precision: 0.43028659623272186
Recall: 0.4226655697243933
AUC: 0.6235867676520411
Model kaydedildi:


In [42]:
XGBoost(X_tr, X_test, y_tr, y_test,X_feat,classes)

------------XPBoost------------------
En iyi eşik sınırı: T_m=0.31, T_b=0.27
              precision    recall  f1-score   support

      normal       0.81      0.82      0.81       187
      benign       0.26      0.31      0.28        39
      cancer       1.00      0.08      0.15        12

    accuracy                           0.70       238
   macro avg       0.69      0.40      0.41       238
weighted avg       0.72      0.70      0.69       238

[[153  34   0]
 [ 27  12   0]
 [ 10   1   1]]
Accuracy: 0.6974789915966386
Precision: 0.6868607689436357
Recall: 0.40306915306915303
AUC: 0.5859079905624176
Model kaydedildi:
