In [None]:
! pip install -q kaggle
from google.colab import files

files.upload()

In [None]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download -d andrewmvd/lung-and-colon-cancer-histopathological-images

In [None]:
import zipfile
zip_ref = zipfile.ZipFile('lung-and-colon-cancer-histopathological-images.zip', 'r')
zip_ref.extractall('/content')
zip_ref.close()

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn import metrics
import cv2
import gc
import os
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Step 2: Data Visualization
path = '/content/lung_colon_image_set/lung_image_sets'
classes = os.listdir(path)
classes

In [None]:
# Display random images from each class
for cat in classes:
    image_dir = f'{path}/{cat}'
    images = os.listdir(image_dir)

    # Display 3 random images for each class
    fig, ax = plt.subplots(1, 3, figsize=(10, 5))
    fig.suptitle(f'Images for {cat} category...', fontsize=20)

    for i in range(3):
        k = np.random.randint(0, len(images))
        img = np.array(Image.open(f'{path}/{cat}/{images[k]}'))
        ax[i].imshow(img)
        ax[i].axis('off')
    plt.show()


In [None]:
# img = Image.open('/content/drive/MyDrive/New Folder/lung_image_sets/lung_aca/lungaca1.jpeg')
# img = np.array(img)
# # img

In [None]:
# Step 3: Data Preparation for Training
IMG_SIZE = 128
SPLIT = 0.2
EPOCHS = 15
BATCH_SIZE = 2

In [None]:
# Lists to store images and labels
X = []
Y = []

# Loop through each class
for i, cat in enumerate(classes):
    images = glob(f'{path}/{cat}/*.jpeg')
    # print(images)
    # Read and resize each image, append to lists
    for image in images:
        img = cv2.imread(image)
        X.append(cv2.resize(img, (IMG_SIZE, IMG_SIZE)))
        Y.append(i)

In [None]:
# print(type(X))
# print(Y)

In [None]:
# Convert lists to NumPy arrays
X = np.asarray(X)
one_hot_encoded_Y = pd.get_dummies(Y).values
one_hot_encoded_Y[300]

In [None]:
# Split the dataset into training and validation sets
X_train, X_val, Y_train, Y_val = train_test_split(X, one_hot_encoded_Y,
                                                  test_size=SPLIT,
                                                  random_state=2022)

In [None]:
# Step 4: Model Development
# Define the CNN model using Keras Sequential API
model = keras.models.Sequential([
    layers.Conv2D(filters=32, kernel_size=(5, 5), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3), padding='same'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(filters=64, kernel_size=(3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D(2, 2),

    layers.Conv2D(filters=128, kernel_size=(3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D(2, 2),

    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    # layers.BatchNormalization(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    # layers.BatchNormalization(),
    layers.Dense(3, activation='softmax')
])


In [None]:
# Print model summary
model.summary()

In [None]:
# Visualize the model architecture
keras.utils.plot_model(
    model,
    show_shapes=True,
    show_dtype=True,
    show_layer_activations=True
)

In [None]:
# Compile the model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
# Step 5: Callbacks
# Define custom callback to stop training when validation accuracy reaches 90%
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('val_accuracy') > 0.90:
            print('\nValidation accuracy has reached 90%, stopping further training.')
            self.model.stop_training = True

In [None]:
# Define EarlyStopping and ReduceLROnPlateau callbacks
es = EarlyStopping(patience=3, monitor='val_accuracy', restore_best_weights=True)
lr = ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.5, verbose=1)

In [None]:
# Step 6: Model Training
# Train the model with the training dataset and validate on the validation dataset
history = model.fit(X_train, Y_train,
                    validation_data=(X_val, Y_val),
                    batch_size=BATCH_SIZE,
                    epochs=EPOCHS,
                    verbose=1,
                    callbacks=[myCallback()])

Epoch 1/15


In [None]:
# Step 7: Visualize Training Metrics
# Plot training and validation loss over epochs
# Plot training and validation accuracy over epochs
history_df = pd.DataFrame(history.history)
history_df.loc[:, ['loss', 'val_loss']].plot()
history_df.loc[:, ['accuracy', 'val_accuracy']].plot()
plt.show()

In [None]:
# Step 8: Model Evaluation
# Predict class labels for the validation dataset
Y_pred = model.predict(X_val)
# print(Y_pred, Y_val)
Y_val = np.argmax(Y_val, axis=1)
Y_pred = np.argmax(Y_pred, axis=1)
# print(Y_pred, Y_val)


In [None]:
# Confusion matrix and classification report for model evaluation
print(metrics.confusion_matrix(Y_val, Y_pred))
print(metrics.classification_report(Y_val, Y_pred, target_names=classes))