<a href="https://colab.research.google.com/github/SivanisIyer/Lung-Cancer-Prediction--Histopathological-images/blob/main/Lung_cancer_org.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


PREPROCESSING

In [3]:
import os
import cv2
import numpy as np
from collections import Counter
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


In [4]:
BASE = "/content/drive/MyDrive/WORK/LUNG CANCER-DS"
IMG_SIZE = 128
PATCH_SIZE = 128


PREP

In [5]:
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (512, 512))
    img = img / 255.0
    return img


PATCH EXT


In [6]:
def extract_patches(img, patch_size=128):
    patches = []
    h, w, _ = img.shape

    for y in range(0, h - patch_size + 1, patch_size):
        for x in range(0, w - patch_size + 1, patch_size):
            patch = img[y:y+patch_size, x:x+patch_size]
            patches.append(patch)

    return patches


LOAD DS

In [4]:
def patch_generator(base_path, batch_size=16):
    class_names = sorted(os.listdir(base_path))
    class_to_label = {cls: i for i, cls in enumerate(class_names)}

    X_batch, y_batch = [], []

    while True:
        for cls in class_names:
            cls_path = os.path.join(base_path, cls)

            for img_name in os.listdir(cls_path):
                if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                    continue

                img_path = os.path.join(cls_path, img_name)
                img = preprocess_image(img_path)
                if img is None:
                    continue

                patches = extract_patches(img)

                for patch in patches:
                    X_batch.append(patch)
                    y_batch.append(class_to_label[cls])

                    if len(X_batch) == batch_size:
                        yield np.array(X_batch), np.array(y_batch)
                        X_batch, y_batch = [], []


In [3]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense


In [6]:
from tensorflow.keras.layers import Input

model = Sequential([
    Input(shape=(128,128,3)),

    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(128, activation='relu'),
    Dense(2, activation='softmax')
])


In [7]:
model.summary()


TRAINING

In [10]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)


In [13]:
import cv2
import numpy as np
import os

# --- Image preprocessing ---
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (512, 512))
    img = img / 255.0
    return img

# --- Patch extraction ---
def extract_patches(img, patch_size=128):
    patches = []
    h, w, _ = img.shape
    for y in range(0, h - patch_size + 1, patch_size):
        for x in range(0, w - patch_size + 1, patch_size):
            patches.append(img[y:y+patch_size, x:x+patch_size])
    return patches

# --- Patch generator (RAM-safe) ---
def patch_generator(base_path, batch_size=16):
    class_names = sorted(os.listdir(base_path))
    class_to_label = {cls: i for i, cls in enumerate(class_names)}

    X_batch, y_batch = [], []

    while True:
        for cls in class_names:
            cls_path = os.path.join(base_path, cls)

            for img_name in os.listdir(cls_path):
                if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                    continue

                img_path = os.path.join(cls_path, img_name)
                img = preprocess_image(img_path)
                if img is None:
                    continue

                patches = extract_patches(img)

                for patch in patches:
                    X_batch.append(patch)
                    y_batch.append(class_to_label[cls])

                    if len(X_batch) == batch_size:
                        yield np.array(X_batch), np.array(y_batch)
                        X_batch, y_batch = [], []


In [14]:
history = model.fit(
    patch_generator(BASE, batch_size=16),
    steps_per_epoch=200,
    epochs=15
)


Epoch 1/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m122s[0m 1s/step - accuracy: 0.9678 - loss: 0.0359
Epoch 2/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 548ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 3/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 529ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 4/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 607ms/step - accuracy: 1.0000 - loss: 0.0000e+00
Epoch 5/5
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 600ms/step - accuracy: 1.0000 - loss: 0.0000e+00


In [15]:
model.save("/content/drive/MyDrive/lung_patch_majority_model.h5")
print("Model saved")




Model saved


In [16]:
from collections import Counter

def predict_image_majority(img_path, model):
    img = preprocess_image(img_path)
    patches = extract_patches(img)

    preds = []
    for patch in patches:
        patch = np.expand_dims(patch, axis=0)
        pred = np.argmax(model.predict(patch, verbose=0))
        preds.append(pred)

    return Counter(preds).most_common(1)[0][0]


In [18]:
import os

BASE = "/content/drive/MyDrive/WORK/LUNG CANCER-DS"

class_names = sorted(os.listdir(BASE))
class_to_label = {cls: i for i, cls in enumerate(class_names)}

print("Classes:", class_names)


Classes: ['LUAD', 'LUSC']


In [22]:
from google.colab import files

uploaded = files.upload()
uploaded_img_path = list(uploaded.keys())[0]
print("Uploaded image:", uploaded_img_path)
pred = predict_image_majority(uploaded_img_path, model)
print("Predicted Cancer Subtype:", class_names[pred])


Saving external_test.jpg to external_test.jpg
Uploaded image: external_test.jpg
Predicted Cancer Subtype: LUAD


In [25]:
def predict_image_majority_fast(img_path, model):
    img = preprocess_image(img_path)
    patches = extract_patches(img)
    patches = np.array(patches)

    preds = np.argmax(model.predict(patches, verbose=0), axis=1)
    return Counter(preds).most_common(1)[0][0]


In [26]:
MAX_IMAGES_PER_CLASS = 100   # keeps it fast

correct = 0
total = 0

for cls in class_names:
    cls_path = os.path.join(BASE, cls)
    true_label = class_to_label[cls]

    count = 0
    for img_name in os.listdir(cls_path):
        if not img_name.lower().endswith(('.jpg','.jpeg','.png')):
            continue

        img_path = os.path.join(cls_path, img_name)
        pred_label = predict_image_majority_fast(img_path, model)

        if pred_label is None:
            continue

        if pred_label == true_label:
            correct += 1
        total += 1

        count += 1
        if count >= MAX_IMAGES_PER_CLASS:
            break

image_accuracy = correct / total
print("✅ Image-level Accuracy (fast):", round(image_accuracy * 100, 2), "%")


✅ Image-level Accuracy (fast): 50.0 %


In [29]:
from collections import Counter
import os
import numpy as np

def quick_image_accuracy(model, base_path, class_names, class_to_label, max_images=20):
    correct = 0
    total = 0

    for cls in class_names:
        cls_path = os.path.join(base_path, cls)
        true_label = class_to_label[cls]

        count = 0
        for img_name in os.listdir(cls_path):
            if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue

            img_path = os.path.join(cls_path, img_name)

            img = preprocess_image(img_path)
            if img is None:
                continue

            patches = extract_patches(img)
            patches = np.array(patches)

            preds = np.argmax(model.predict(patches, verbose=0), axis=1)
            final_pred = Counter(preds).most_common(1)[0][0]

            if final_pred == true_label:
                correct += 1
            total += 1

            count += 1
            if count >= max_images:
                break

    return round((correct / total) * 100, 2)


In [30]:
acc = quick_image_accuracy(
    model,
    BASE,
    class_names,
    class_to_label,
    max_images=20
)

print("✅ Image-level Accuracy:", acc, "%")


✅ Image-level Accuracy: 50.0 %
