In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from glob import glob

import cv2
import os
import gc

import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics

import tensorflow as tf
from tensorflow import keras
from keras import layers

import warnings
warnings.filterwarnings('ignore')

In [None]:
from zipfile import ZipFile

# Path to the zip file
data_path = 'lung-and-colon-cancer-histopathological-images.zip'

# Extract the contents of the zip file
with ZipFile(data_path, 'r') as zip:
    zip.extractall()  # Extract all files and directories to the current working directory
    print('The data set has been extracted.')


Data Visualization

In [None]:
# Set the path to the directory containing the lung image sets
path = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets'

# Get the list of classes (subdirectories) in the specified path
classes = os.listdir(path)

# Print the list of classes
print(classes)


In [None]:
path = '/kaggle/input/lung-and-colon-cancer-histopathological-images/lung_colon_image_set/lung_image_sets'

# Iterate over each category
for cat in classes:
    image_dir = f'{path}/{cat}'
    images = os.listdir(image_dir)
    
    # Create a figure with 3 subplots
    fig, ax = plt.subplots(1, 3, figsize=(15, 5))
    fig.suptitle(f'Images for {cat} category . . . .', fontsize=20)
    
    # Display 3 random images from the category
    for i in range(3):
        k = np.random.randint(0, len(images))
        img = np.array(Image.open(f'{path}/{cat}/{images[k]}'))
        
        # Show the image on the subplot
        ax[i].imshow(img)
        ax[i].axis('off')
    
    plt.show()  # Show the plot with the images


Data Preparation for Training

In [None]:
# Set the image size for resizing images
IMG_SIZE = 256

# Set the split ratio for train-test split
SPLIT = 0.2

# Set the number of epochs for training
EPOCHS = 10

# Set the batch size for training
BATCH_SIZE = 64

In [None]:
# Create empty lists to store the data
X = []
Y = []

# Loop over each category
for i, cat in enumerate(classes):
    # Get a list of image paths for the current category
    images = glob(f'{path}/{cat}/*.jpeg')

    # Loop over each image in the current category
    for image in images:
        # Read the image using OpenCV
        img = cv2.imread(image)

        # Resize the image to the desired size (IMG_SIZE)
        resized_img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

        # Append the resized image to the X list
        X.append(resized_img)

        # Append the category index (i) to the Y list
        Y.append(i)

# Convert X list to a NumPy array
X = np.asarray(X)

# Perform one-hot encoding on Y using pandas get_dummies function
one_hot_encoded_Y = pd.get_dummies(Y).values

In [None]:
# Splitting the data into training and validation sets using train_test_split
X_train, X_val, Y_train, Y_val = train_test_split(X, one_hot_encoded_Y,
                                                  test_size=SPLIT,
                                                  random_state=2022)

# Printing the shapes of the training and validation sets
print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)


Model Development

In [None]:
model = keras.models.Sequential([
    # First convolutional layer
    layers.Conv2D(
        filters=32,
        kernel_size=(5, 5),
        activation='relu',
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
        padding='same'
    ),
    layers.MaxPooling2D(2, 2),  # Max pooling layer
 
    # Second convolutional layer
    layers.Conv2D(
        filters=64,
        kernel_size=(3, 3),
        activation='relu',
        padding='same'
    ),
    layers.MaxPooling2D(2, 2),  # Max pooling layer
 
    # Third convolutional layer
    layers.Conv2D(
        filters=128,
        kernel_size=(3, 3),
        activation='relu',
        padding='same'
    ),
    layers.MaxPooling2D(2, 2),  # Max pooling layer
 
    layers.Flatten(),  # Flatten the output of the previous layer
 
    layers.Dense(256, activation='relu'),  # Fully connected layer
    layers.BatchNormalization(),  # Batch normalization layer
 
    layers.Dense(128, activation='relu'),  # Fully connected layer
    layers.Dropout(0.3),  # Dropout layer to prevent overfitting
    layers.BatchNormalization(),  # Batch normalization layer
 
    layers.Dense(3, activation='softmax')  # Output layer with softmax activation
])


In [None]:
model.summary()

In [None]:
# Plot the model architecture
keras.utils.plot_model(
    model,  # The Keras model object to be plotted
    show_shapes=True,  # Flag to show the shapes of the layers
    show_dtype=True,  # Flag to show the data types of the layers
    show_layer_activations=True  # Flag to show the activations of the layers
)

Karan.. ith entha ingane enn areela... ellam download okke aaki still preshnam

In [None]:
model.compile(
    optimizer='adam',  # Optimizer used for training the model
    loss='categorical_crossentropy',  # Loss function used for training
    metrics=['accuracy']  # Evaluation metrics used during training
)

Callback

In [None]:
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

# Custom callback to stop training when validation accuracy reaches 90%
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if logs.get('val_accuracy') > 0.90:
            print('\nValidation accuracy has reached 90%, stopping further training.')
            self.model.stop_training = True

# Early stopping callback to stop training if validation accuracy does not improve for 3 consecutive epochs
es = EarlyStopping(
    patience=3,
    monitor='val_accuracy',
    restore_best_weights=True
)

# Reduce learning rate on plateau callback to reduce learning rate if validation loss does not improve for 2 consecutive epochs
lr = ReduceLROnPlateau(
    monitor='val_loss',
    patience=2,
    factor=0.5,
    verbose=1
)

In [None]:
history = model.fit(X_train, Y_train,
                    validation_data = (X_val, Y_val),
                    batch_size = BATCH_SIZE,
                    epochs = EPOCHS,
                    verbose = 1,
                    callbacks = [es, lr, myCallback()])

In [None]:
# Convert the history dictionary to a DataFrame
history_df = pd.DataFrame(history.history)

# Plot the training and validation loss
plt.figure(figsize=(10, 6))
history_df.loc[:, ['loss', 'val_loss']].plot()
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Training Loss', 'Validation Loss'])
plt.grid(True)
plt.show()

# Plot the training and validation accuracy
plt.figure(figsize=(10, 6))
history_df.loc[:, ['accuracy', 'val_accuracy']].plot()
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Training Accuracy', 'Validation Accuracy'])
plt.grid(True)
plt.show()

Model Evaluation

In [None]:
# Make predictions on validation data
Y_pred = model.predict(X_val)

# Convert true labels to their corresponding class indices
Y_val = np.argmax(Y_val, axis=1)

# Convert predicted labels to their corresponding class indices
Y_pred = np.argmax(Y_pred, axis=1)

In [None]:
confusion_matrix = metrics.confusion_matrix(Y_val, Y_pred)

df_confusion_matrix = pd.DataFrame(confusion_matrix)

# Set the axis labels
df_confusion_matrix.index.name = 'Actual'
df_confusion_matrix.columns.name = 'Predicted'

print(df_confusion_matrix)

In [None]:
# Print the classification report
report = metrics.classification_report(Y_val, Y_pred, target_names=classes)
print(report)