In [1]:
import numpy as np
import pandas as pd
import cv2
import os

In [3]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [4]:
print(os.listdir("/gdrive/My Drive/DerinOgrenmeProje/lung_image_sets"))

['lung_squamous cell carcinoma', 'lung_adenocarcinoma', 'lung_benign tissue']


In [5]:
datadir = '/gdrive/My Drive/DerinOgrenmeProje/lung_image_sets'
categories = ['lung_benign tissue', 'lung_squamous cell carcinoma', 'lung_adenocarcinoma']
all_data = []
img_size = 128

In [6]:
def create_all_data():
    for category in categories:
        path=str(os.path.join(datadir, category))
        class_num=categories.index(category)
        count = 0
        for img in os.listdir(path):
            img_array=cv2.imread(os.path.join(path,img))
            new_array=cv2.resize(img_array,(img_size,img_size), interpolation=cv2.INTER_AREA)
            all_data.append([new_array,class_num])
            count += 1
        print(class_num,path,count)

create_all_data()

0 /gdrive/My Drive/DerinOgrenmeProje/lung_image_sets/lung_benign tissue 5000
1 /gdrive/My Drive/DerinOgrenmeProje/lung_image_sets/lung_squamous cell carcinoma 5000
2 /gdrive/My Drive/DerinOgrenmeProje/lung_image_sets/lung_adenocarcinoma 5000


In [7]:
print("Total Image: ", len(all_data))

Total Image:  15000


In [8]:
X = []
y = []

for categories, label in all_data:
  X.append(categories)
  y.append(label)

In [9]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state =42)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10, random_state=42)

In [10]:
x_train = np.array(x_train)
x_test = np.array(x_test)

y_train = np.array(y_train)
y_test = np.array(y_test)

x_val = np.array(x_val)
y_val = np.array(y_val)

In [11]:
print(x_train.shape)
print(x_test.shape)
print(x_val.shape)

print(y_train.shape)
print(y_test.shape)
print(y_val.shape)

(10800, 128, 128, 3)
(3000, 128, 128, 3)
(1200, 128, 128, 3)
(10800,)
(3000,)
(1200,)


In [12]:
import tensorflow as tf

train_yCl = tf.keras.utils.to_categorical(y_train, num_classes=3)
test_yCl = tf.keras.utils.to_categorical(y_test, num_classes=3)
valid_yCl = tf.keras.utils.to_categorical(y_val, num_classes=3)

In [13]:
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

def create_model(learn_rate):
    model = Sequential()

    model.add(Conv2D(32, (4,4), padding='same', activation='relu',input_shape=(img_size, img_size, 3)))
    model.add(Conv2D(32, (4,4), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    model.add(Conv2D(64, (4,4), padding='same', activation='relu'))
    model.add(Conv2D(64, (4,4), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    model.add(Conv2D(128, (4,4), padding='same', activation='relu'))
    model.add(Conv2D(128, (4,4), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.4))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(3))
    model.add(Activation('softmax'))

# epoch başı 195-293 iterasyon
    learn_rate = ExponentialDecay(learn_rate, decay_steps=1300, decay_rate=0.96, staircase=True)
    optimizer=Adam(learning_rate=learn_rate,beta_1=0.9,beta_2=0.999)

    model.compile(optimizer=optimizer,loss="categorical_crossentropy",metrics=["accuracy"])
    return model

In [14]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.12.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.12.0


In [15]:
from scikeras.wrappers import KerasClassifier

param_grid = {
    'learn_rate': [0.005, 0.01, 0.05]
}

model = KerasClassifier(build_fn=create_model,batch_size=32, epochs=5, learn_rate=0.005)

In [16]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=3)
grid_result = grid.fit(x_train, train_yCl)

  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.563056 using {'learn_rate': 0.005}


In [18]:
from scipy.stats import uniform
from sklearn.model_selection import RandomizedSearchCV

param_dist = {
    'learn_rate': uniform(0.005, 0.1)
}

random = RandomizedSearchCV(estimator=model, param_distributions=param_dist, scoring='accuracy', cv=3, n_iter=3)
random_result = random.fit(x_train, train_yCl)

  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  X, y = self._initialize(X, y)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [19]:
print("Best: %f using %s" % (random_result.best_score_, random_result.best_params_))

Best: 0.336111 using {'learn_rate': 0.0180131508459296}


In [20]:
from tensorflow import keras

callback_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='model.h5',
        monitor = 'val_accuracy', save_best_only=True, verbose=3
    ),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=6, verbose=3)
]

In [21]:
history = create_model(0.005).fit(x_train, train_yCl,
                    batch_size=32,
                    validation_data = (x_val, valid_yCl),
                    callbacks = callback_list,
                    epochs = 13
                    )

Epoch 1/13
Epoch 1: val_accuracy improved from -inf to 0.66750, saving model to model.h5


  saving_api.save_model(


Epoch 2/13
Epoch 2: val_accuracy did not improve from 0.66750
Epoch 3/13
Epoch 3: val_accuracy did not improve from 0.66750
Epoch 4/13
Epoch 4: val_accuracy improved from 0.66750 to 0.85250, saving model to model.h5
Epoch 5/13
Epoch 5: val_accuracy did not improve from 0.85250
Epoch 6/13
Epoch 6: val_accuracy improved from 0.85250 to 0.89500, saving model to model.h5
Epoch 7/13
Epoch 7: val_accuracy did not improve from 0.89500
Epoch 8/13
Epoch 8: val_accuracy did not improve from 0.89500
Epoch 9/13
Epoch 9: val_accuracy did not improve from 0.89500
Epoch 10/13
Epoch 10: val_accuracy improved from 0.89500 to 0.91750, saving model to model.h5
Epoch 11/13
Epoch 11: val_accuracy did not improve from 0.91750
Epoch 12/13
Epoch 12: val_accuracy did not improve from 0.91750
Epoch 13/13
Epoch 13: val_accuracy did not improve from 0.91750


In [22]:
model = keras.models.load_model('/content/model.h5')

In [23]:
score_valid = model.evaluate(x_val, valid_yCl)
print("Validation Accuracy: ", score_valid)

score_test = model.evaluate(x_test, test_yCl)
print("Test Accuracy: ", score_test)

score_train = model.evaluate(x_train, train_yCl)
print("Train Accuracy: ", score_train)

Validation Accuracy:  [0.2431485950946808, 0.9175000190734863]
Test Accuracy:  [0.24771206080913544, 0.9136666655540466]
Train Accuracy:  [0.2239312082529068, 0.9226852059364319]


In [32]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix,classification_report

def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    confusion_mat = confusion_matrix(y_true, y_pred)
    class_report = classification_report(y_true, y_pred)
    return accuracy, f1, precision, recall, confusion_mat, class_report

In [45]:
y_pred = model.predict(x_test)



In [49]:
accuracy, f1, precision, recall, confusion_mat, class_report = calculate_metrics(test_yCl.argmax(axis=1),y_pred.argmax(axis=1))

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Skoru: {f1:.4f}\n")

print("\nKarmaşıklık Matrisi:")
print(confusion_mat)

print("\nSınıflandırma Raporu:")
print(class_report)

Test Accuracy: 0.9137
Test Precision: 0.9140
Test Recall: 0.9137
Test F1 Skoru: 0.9122


Karmaşıklık Matrisi:
[[1028    0    9]
 [   4  904   62]
 [  98   86  809]]

Sınıflandırma Raporu:
              precision    recall  f1-score   support

           0       0.91      0.99      0.95      1037
           1       0.91      0.93      0.92       970
           2       0.92      0.81      0.86       993

    accuracy                           0.91      3000
   macro avg       0.91      0.91      0.91      3000
weighted avg       0.91      0.91      0.91      3000

