In [5]:
# === Step 1: Import Libraries ===
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix

In [6]:
# === Step 2: Data Paths ===
base_dir = 'D:/22.11.5308/SMT 6/Data Mining/Dataset/CT-Scan Lung Cancer'

if not os.path.exists(base_dir):
    raise FileNotFoundError(f"Base directory not found: {base_dir}")

In [7]:
# === Step 3: Image Preprocessing ===
img_size = (224, 224)
batch_size = 32

train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=10,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

valid_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

Found 2888 images belonging to 3 classes.
Found 721 images belonging to 3 classes.


In [8]:
# === Step 4: Load ResNet50 Base ===
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
    layer.trainable = False

In [9]:
# === Step 5: Build Custom Classifier ===
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
predictions = Dense(train_generator.num_classes, activation='softmax')(x)
model = Model(inputs=base_model.input, outputs=predictions)

In [10]:
# === Step 6: Compile Model ===
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [11]:
# === Step 7: Train Model ===
earlystop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=15,
    callbacks=[earlystop, lr_reduce]
)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [12]:
# === Step 8: Evaluate Model ===
val_loss, val_acc = model.evaluate(valid_generator)
print("\nValidation Accuracy:", val_acc)


Validation Accuracy: 0.775312066078186


In [13]:
# === Step 9: Classification Report ===
y_true = valid_generator.classes
y_pred = model.predict(valid_generator)
y_pred_classes = np.argmax(y_pred, axis=1)

print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes, target_names=valid_generator.class_indices.keys()))

print("\nConfusion Matrix:")
print(confusion_matrix(y_true, y_pred_classes))


Classification Report:
                 precision    recall  f1-score   support

   Benign cases       0.26      0.10      0.15       240
Malignant cases       0.31      0.30      0.31       240
   Normal cases       0.32      0.52      0.40       241

       accuracy                           0.31       721
      macro avg       0.30      0.31      0.29       721
   weighted avg       0.30      0.31      0.29       721


Confusion Matrix:
[[ 25  77 138]
 [ 40  73 127]
 [ 31  84 126]]
