In [None]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

# ✅ Define dataset path
dataset_path = r"Datasets"  # Change to absolute path if needed
categories = ['Benign case', 'Malignant case', 'Normal case']
img_size = 256  # Resize all images to 256x256

data = []
labels = []

# ✅ Verify if dataset path exists
if not os.path.exists(dataset_path):
    raise FileNotFoundError(f"Error: The dataset folder '{dataset_path}' does not exist.")

# ✅ Load and preprocess images
for category in categories:
    path = os.path.join(dataset_path, category)
    
    if not os.path.exists(path):
        print(f"Warning: Folder '{category}' not found! Skipping...")
        continue  # Skip missing folders

    class_num = categories.index(category)
    
    for file in os.listdir(path):
        filepath = os.path.join(path, file)
        if not os.path.isfile(filepath):  
            continue  # Skip if not a valid file

        img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
        if img is None:
            print(f"Warning: Could not read {filepath}. Skipping...")
            continue
        
        img = cv2.resize(img, (img_size, img_size))  # Resize
        data.append(img)
        labels.append(class_num)

# ✅ Convert to NumPy arrays
data = np.array(data).reshape(-1, img_size, img_size, 1) / 255.0  # Normalize
y = np.array(labels)

# ✅ Split dataset
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.2, stratify=y, random_state=42)

# ✅ Apply SMOTE for class balancing
X_train = X_train.reshape(X_train.shape[0], -1)
smote = SMOTE()
X_train, y_train = smote.fit_resample(X_train, y_train)
X_train = X_train.reshape(X_train.shape[0], img_size, img_size, 1)

# ✅ Data augmentation
datagen = ImageDataGenerator(rotation_range=15, horizontal_flip=True, zoom_range=0.2)
datagen.fit(X_train)

# ✅ Model Definition
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', input_shape=(img_size, img_size, 1)),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# ✅ Train model
history = model.fit(datagen.flow(X_train, y_train, batch_size=16), validation_data=(X_test, y_test), epochs=25)

# ✅ Evaluate model
y_pred = np.argmax(model.predict(X_test), axis=1)
print(classification_report(y_test, y_pred))

# ✅ Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=categories, yticklabels=categories)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# ✅ Plot Accuracy & Loss
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Model Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Validation')
plt.title('Model Loss')
plt.legend()
plt.show()

# ✅ Save model
model.save("lung_cancer_classifier.h5")

print("✅ Model training complete! Saved as 'lung_cancer_classifier.h5'")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


  self._warn_if_super_not_called()


Epoch 1/25
[1m32/74[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m1:27[0m 2s/step - accuracy: 0.3871 - loss: 1.3737