<a href="https://colab.research.google.com/github/AhmetBerkayULUTAS/Lung-Cancer-Classification-with-CNN/blob/main/Lung_Cancer_Classification_with_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
import numpy as np
import cv2
import os
from google.colab import drive
import pickle

In [27]:
print(os.listdir("/content/drive/MyDrive/lung_image_sets"))

['lung_squamous cell carcinoma', 'lung_adenocarcinoma', 'lung_benign tissue']


In [28]:
datadir = '/content/drive/MyDrive/lung_image_sets'
categories = ['lung_benign tissue', 'lung_squamous cell carcinoma', 'lung_adenocarcinoma']
all_data = []
img_size = 128

In [4]:
def create_all_data():
    for category in categories:
        path=os.path.join(datadir, category)
        class_num=categories.index(category)
        count = 0
        for img in os.listdir(path):
            img_array=cv2.imread(os.path.join(path,img))
            new_array=cv2.resize(img_array,(img_size,img_size), interpolation=cv2.INTER_AREA)
            all_data.append([new_array,class_num])
            count += 1
        print("Category label no:",class_num,path," count:", count)

    with open('Data128x128.pkl', 'wb') as file:
        pickle.dump(all_data, file)

create_all_data()

Category label no: 0 /content/drive/MyDrive/lung_image_sets/lung_benign tissue  count: 4495
Category label no: 1 /content/drive/MyDrive/lung_image_sets/lung_squamous cell carcinoma  count: 5000
Category label no: 2 /content/drive/MyDrive/lung_image_sets/lung_adenocarcinoma  count: 5000


In [5]:
with open('Data128x128.pkl', 'rb') as file:
    all_data = pickle.load(file)

print(len(all_data))

14495


In [6]:
X = []
y = []

for categories, label in all_data:
  X.append(categories)
  y.append(label)

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state =42)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.10, random_state=42)

x_train = np.array(x_train)
x_test = np.array(x_test)

y_train = np.array(y_train)
y_test = np.array(y_test)

x_val = np.array(x_val)
y_val = np.array(y_val)

In [8]:
print("X Train")
print(x_train.shape)
print("X Test")
print(x_test.shape)
print("X Vaidation")
print(x_val.shape,"\n")

print("Y Train")
print(y_train.shape)
print("Y Test")
print(y_test.shape)
print("Y Vaidation")
print(y_val.shape)

X Train
(10436, 128, 128, 3)
X Test
(2899, 128, 128, 3)
X Vaidation
(1160, 128, 128, 3) 

Y Train
(10436,)
Y Test
(2899,)
Y Vaidation
(1160,)


In [9]:
import tensorflow as tf

train_yCl = tf.keras.utils.to_categorical(y_train, num_classes=3)
test_yCl = tf.keras.utils.to_categorical(y_test, num_classes=3)
valid_yCl = tf.keras.utils.to_categorical(y_val, num_classes=3)

In [10]:
from keras.layers import Input, Dense, Activation, BatchNormalization, Flatten, Conv2D, MaxPooling2D, Dropout
from keras.models import Sequential
from keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay

def create_model(learning_rate,kernel,dropout):
    model = Sequential()

    model.add(Conv2D(32, (kernel,kernel), padding='same', activation='relu',input_shape=(img_size, img_size, 3)))
    model.add(Conv2D(32, (kernel,kernel), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    model.add(Conv2D(64, (kernel,kernel), padding='same', activation='relu'))
    model.add(Conv2D(64, (kernel,kernel), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    model.add(Conv2D(128, (kernel,kernel), padding='same', activation='relu'))
    model.add(Conv2D(128, (kernel,kernel), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.1))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(3))
    model.add(Activation('softmax'))

    learning_rate = ExponentialDecay(learning_rate, decay_steps=1300, decay_rate=0.96, staircase=True)
    optimizer=Adam(learning_rate=learning_rate,beta_1=0.9,beta_2=0.999)

    model.compile(optimizer=optimizer,loss="categorical_crossentropy",metrics=["accuracy"])
    return model

In [11]:
!pip install scikeras

Collecting scikeras
  Downloading scikeras-0.12.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.12.0


In [12]:
from scikeras.wrappers import KerasClassifier
from scipy.stats import uniform, randint

param_dist = {
    'learning_rate' : uniform(0.005 , 0.05),
    'kernel'        : randint(2 , 5),
    'dropout'       : uniform(0.2 , 0.5)
}

model = KerasClassifier(model=create_model,batch_size=32, epochs=5, learning_rate=0.005, kernel=2, dropout=0.2)

In [13]:
from sklearn.model_selection import RandomizedSearchCV

random = RandomizedSearchCV(estimator=model, param_distributions=param_dist, scoring='accuracy', cv=2, n_iter=5)
random_result = random.fit(x_train, train_yCl)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
print("Best: %f using %s" % (random_result.best_score_, random_result.best_params_))

Best: 0.621694 using {'dropout': 0.40180437801518243, 'kernel': 3, 'learning_rate': 0.005958432489782712}


In [30]:
import pandas as pd
print("\nAll Results :")
cv_results = random_result.cv_results_
df = pd.DataFrame(cv_results)

print(df)


All Results :
   mean_fit_time  std_fit_time  mean_score_time  std_score_time param_dropout  \
0      78.054261      6.863165         2.779725        0.380194      0.313068   
1      84.509672      0.010883         3.342702        0.045294      0.429364   
2      85.199866      0.227325         3.406076        0.130054      0.433329   
3      85.107185      0.535596         3.411769        0.123168      0.314507   
4      84.791450      0.294728         3.331050        0.097888      0.401804   

  param_kernel param_learning_rate  \
0            3             0.00915   
1            3            0.023517   
2            2            0.038999   
3            2            0.027118   
4            3            0.005958   

                                              params  split0_test_score  \
0  {'dropout': 0.3130677744874459, 'kernel': 3, '...           0.482560   
1  {'dropout': 0.4293638303628683, 'kernel': 3, '...           0.352434   
2  {'dropout': 0.43332946802247807, 'kernel'

In [18]:
final_model = create_model(learning_rate=random_result.best_params_['learning_rate'],
                           kernel=random_result.best_params_['kernel'],
                           dropout=random_result.best_params_['dropout']
                           )

In [19]:
from tensorflow import keras

callback_list = [
    keras.callbacks.ModelCheckpoint(
        filepath='model.h5',
        monitor = 'val_accuracy', save_best_only=True, verbose=3
    ),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=8, verbose=3)
]

In [20]:
history = final_model.fit(x_train, train_yCl,
                    batch_size=32,
                    validation_data = (x_val, valid_yCl),
                    callbacks = callback_list,
                    epochs = 20
                    )

Epoch 1/20
Epoch 1: val_accuracy improved from -inf to 0.55690, saving model to model.h5


  saving_api.save_model(


Epoch 2/20
Epoch 2: val_accuracy improved from 0.55690 to 0.81552, saving model to model.h5
Epoch 3/20
Epoch 3: val_accuracy did not improve from 0.81552
Epoch 4/20
Epoch 4: val_accuracy improved from 0.81552 to 0.88190, saving model to model.h5
Epoch 5/20
Epoch 5: val_accuracy did not improve from 0.88190
Epoch 6/20
Epoch 6: val_accuracy did not improve from 0.88190
Epoch 7/20
Epoch 7: val_accuracy did not improve from 0.88190
Epoch 8/20
Epoch 8: val_accuracy did not improve from 0.88190
Epoch 9/20
Epoch 9: val_accuracy did not improve from 0.88190
Epoch 10/20
Epoch 10: val_accuracy did not improve from 0.88190
Epoch 11/20
Epoch 11: val_accuracy improved from 0.88190 to 0.91379, saving model to model.h5
Epoch 12/20
Epoch 12: val_accuracy did not improve from 0.91379
Epoch 13/20
Epoch 13: val_accuracy did not improve from 0.91379
Epoch 14/20
Epoch 14: val_accuracy did not improve from 0.91379
Epoch 15/20
Epoch 15: val_accuracy did not improve from 0.91379
Epoch 16/20
Epoch 16: val_accu

In [21]:
model = keras.models.load_model('/content/model.h5')

In [22]:
score_valid = model.evaluate(x_val, valid_yCl)
print("Validation Accuracy: ", score_valid)

score_test = model.evaluate(x_test, test_yCl)
print("Validation Accuracy: ", score_test)

score_train = model.evaluate(x_train, train_yCl)
print("Validation Accuracy: ", score_train)

Validation Accuracy:  [0.28163811564445496, 0.9137930870056152]
Validation Accuracy:  [0.3167824149131775, 0.9096239805221558]
Validation Accuracy:  [0.2087075114250183, 0.9256420135498047]


In [23]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix,classification_report

def calculate_metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    confusion_mat = confusion_matrix(y_true, y_pred)
    class_report = classification_report(y_true, y_pred)
    return accuracy, f1, precision, recall, confusion_mat, class_report

In [24]:
y_pred = model.predict(x_test)



In [25]:
accuracy, f1, precision, recall, confusion_mat, class_report = calculate_metrics(test_yCl.argmax(axis=1),y_pred.argmax(axis=1))

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print(f"Test Recall: {recall:.4f}")
print(f"Test F1 Skoru: {f1:.4f}\n")

print("\nKarmaşıklık Matrisi:")
print(confusion_mat)

print("\nSınıflandırma Raporu:")
print(class_report)

Test Accuracy: 0.9096
Test Precision: 0.9166
Test Recall: 0.9096
Test F1 Skoru: 0.9107


Karmaşıklık Matrisi:
[[870   0  59]
 [  0 822 145]
 [  0  58 945]]

Sınıflandırma Raporu:
              precision    recall  f1-score   support

           0       1.00      0.94      0.97       929
           1       0.93      0.85      0.89       967
           2       0.82      0.94      0.88      1003

    accuracy                           0.91      2899
   macro avg       0.92      0.91      0.91      2899
weighted avg       0.92      0.91      0.91      2899

