<a href="https://www.kaggle.com/code/jobaerislam/lung-colon?scriptVersionId=238436817" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import numpy as np
import os
import cv2
from sklearn.model_selection import train_test_split
from keras.applications import MobileNetV2
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Dense
from keras.optimizers import Adam
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping

2025-05-07 20:08:12.928753: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746648492.952375     954 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746648492.959291     954 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
dataset_dir = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set'

In [3]:
img_height, img_width = 224, 224

In [4]:
def load_data(dataset_dir):
    images = []
    labels = []

    # List the main folders
    main_folders = ['colon_image_sets', 'lung_image_sets']

    for main_folder in main_folders:
        main_path = os.path.join(dataset_dir, main_folder)
        for folder in os.listdir(main_path):
            label = folder
            folder_path = os.path.join(main_path, folder)
            for filename in os.listdir(folder_path):
                img = cv2.imread(os.path.join(folder_path, filename))
                img = cv2.resize(img, (img_height, img_width))
                images.append(img)
                labels.append(label)

    images = np.array(images)
    labels = np.array(labels)
    return images, labels

images, labels = load_data(dataset_dir)

In [5]:
label_dict = {
    'colon_aca': 0, 
    'colon_n': 1, 
    'lung_aca': 2, 
    'lung_n': 3, 
    'lung_scc': 4
}
labels = np.array([label_dict[label] for label in labels])
labels = to_categorical(labels)

In [6]:
X_train, X_temp, y_train, y_temp = train_test_split(
    images, labels, test_size=0.3, random_state=42)

X_test, X_val, y_test, y_val = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42)

In [7]:
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(img_height, img_width, 3))

I0000 00:00:1746648750.573953     954 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


In [8]:
model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256, activation='relu'),
    Dense(5, activation='softmax')
])

In [9]:
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])


In [10]:
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


In [11]:
model.fit(X_train, y_train, batch_size=32, epochs=50,
          validation_data=(X_val, y_val),
          callbacks=[early_stop])

Epoch 1/50


I0000 00:00:1746648794.085702    1006 service.cc:148] XLA service 0x7f25f4002290 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1746648794.085767    1006 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1746648797.302765    1006 cuda_dnn.cc:529] Loaded cuDNN version 90300
E0000 00:00:1746648802.209730    1006 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1746648802.406930    1006 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m  1/547[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:16:32[0m 55s/step - accuracy: 0.2188 - loss: 1.8927

I0000 00:00:1746648812.923256    1006 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m546/547[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 71ms/step - accuracy: 0.9119 - loss: 0.2561

E0000 00:00:1746648859.523163    1005 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.
E0000 00:00:1746648859.722729    1005 gpu_timer.cc:82] Delay kernel timed out: measured time has sub-optimal accuracy. There may be a missing warmup execution, please investigate in Nsight Systems.


[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 123ms/step - accuracy: 0.9120 - loss: 0.2558 - val_accuracy: 0.3576 - val_loss: 20.6181
Epoch 2/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 76ms/step - accuracy: 0.9666 - loss: 0.1008 - val_accuracy: 0.2008 - val_loss: 12.8904
Epoch 3/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 75ms/step - accuracy: 0.9818 - loss: 0.0539 - val_accuracy: 0.3712 - val_loss: 19.8547
Epoch 4/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 75ms/step - accuracy: 0.9805 - loss: 0.0596 - val_accuracy: 0.2059 - val_loss: 23.3246
Epoch 5/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 75ms/step - accuracy: 0.9862 - loss: 0.0368 - val_accuracy: 0.8467 - val_loss: 0.7030
Epoch 6/50
[1m547/547[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 75ms/step - accuracy: 0.9905 - loss: 0.0314 - val_accuracy: 0.9741 - val_loss: 0.1033
Epoch 7/50
[1m547/54

<keras.src.callbacks.history.History at 0x7f285db30f50>

In [12]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Accuracy: {test_accuracy:.4f}")

[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.9728 - loss: 0.1072
Test Accuracy: 0.9755
