<a href="https://colab.research.google.com/github/Arif2455/lung-tumour-detection/blob/main/LungTumourDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
# === Cell 2: create folder structure ===
import os, shutil, math
os.makedirs(DATA_DIR, exist_ok=True)
class_a_dir = os.path.join(DATA_DIR, CLASS_A_NAME)
class_b_dir = os.path.join(DATA_DIR, CLASS_B_NAME)
os.makedirs(class_a_dir, exist_ok=True)
os.makedirs(class_b_dir, exist_ok=True)
print("Created folders:")
print(" -", class_a_dir)
print(" -", class_b_dir)


Created folders:
 - /content/lung_project/normal
 - /content/lung_project/tumour


In [7]:
# === Cell 1: configure these variables ===
UPLOAD_DIR = "/content"            # where your loose images currently are (change if different)
DATA_DIR   = "/content/lung_project"  # target dataset folder we'll create
CLASS_A_NAME = "normal"            # change if you want different names
CLASS_B_NAME = "tumour"
CLASS_A_COUNT = 154                # number of images you want in CLASS_A
CLASS_B_COUNT = 93                 # number of images you want in CLASS_B
SPLIT_MODE = "by_count"            # options: "by_count", "by_pattern", "by_csv"

# === Helper & quick preview ===
import pathlib, os
IMG_EXTS = {'.jpg','.jpeg','.png','.bmp','.tif','.tiff'}
p = pathlib.Path(UPLOAD_DIR)
all_imgs = [str(x) for x in sorted(p.glob("*")) if x.suffix.lower() in IMG_EXTS]

print(f"Found {len(all_imgs)} image files in {UPLOAD_DIR}")
print("First 40 filenames:")
for i,fn in enumerate(all_imgs[:40], 1):
    print(f"{i:3d}. {os.path.basename(fn)}")


Found 246 image files in /content
First 40 filenames:
  1. JPCLN001.png
  2. JPCLN002.png
  3. JPCLN003.png
  4. JPCLN004.png
  5. JPCLN005.png
  6. JPCLN006.png
  7. JPCLN007.png
  8. JPCLN008.png
  9. JPCLN009.png
 10. JPCLN010.png
 11. JPCLN011.png
 12. JPCLN012.png
 13. JPCLN013.png
 14. JPCLN014.png
 15. JPCLN015.png
 16. JPCLN016.png
 17. JPCLN017.png
 18. JPCLN018.png
 19. JPCLN019.png
 20. JPCLN020.png
 21. JPCLN021.png
 22. JPCLN022.png
 23. JPCLN023.png
 24. JPCLN024.png
 25. JPCLN025.png
 26. JPCLN026.png
 27. JPCLN027.png
 28. JPCLN028.png
 29. JPCLN029.png
 30. JPCLN030.png
 31. JPCLN031.png
 32. JPCLN032.png
 33. JPCLN033.png
 34. JPCLN034.png
 35. JPCLN035.png
 36. JPCLN036.png
 37. JPCLN037.png
 38. JPCLN038.png
 39. JPCLN039.png
 40. JPCLN040.png


In [9]:
# === Cell 3A: Auto split by count ===
if SPLIT_MODE != "by_count":
    print("Skipping by_count (SPLIT_MODE != 'by_count').")
else:
    imgs = all_imgs.copy()
    total_needed = CLASS_A_COUNT + CLASS_B_COUNT
    if len(imgs) < total_needed:
        raise SystemExit(f"Not enough images found ({len(imgs)}) for requested counts ({total_needed}).")
    # Move first N to class A, next M to class B
    a_list = imgs[:CLASS_A_COUNT]
    b_list = imgs[CLASS_A_COUNT:CLASS_A_COUNT+CLASS_B_COUNT]

    import shutil, os
    for src in a_list:
        dst = os.path.join(class_a_dir, os.path.basename(src))
        shutil.move(src, dst)
    for src in b_list:
        dst = os.path.join(class_b_dir, os.path.basename(src))
        shutil.move(src, dst)

    print(f"Moved {len(a_list)} → {class_a_dir}")
    print(f"Moved {len(b_list)} → {class_b_dir}")


SystemExit: Not enough images found (246) for requested counts (247).

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [11]:
import shutil, os

CLASS_A_NAME = "normal"
CLASS_B_NAME = "tumour"
CLASS_A_COUNT = 153   # adjust so A+B = total
CLASS_B_COUNT = 93

# Source = all loose images in /content
IMG_EXTS = {'.jpg','.jpeg','.png','.bmp','.tif','.tiff'}
import pathlib
UPLOAD_DIR = "/content"
all_imgs = [str(x) for x in sorted(pathlib.Path(UPLOAD_DIR).glob("*")) if x.suffix.lower() in IMG_EXTS]

print(f"Total images found: {len(all_imgs)}")

# Make folders
DATA_DIR = "/content/lung_project"
os.makedirs(os.path.join(DATA_DIR, CLASS_A_NAME), exist_ok=True)
os.makedirs(os.path.join(DATA_DIR, CLASS_B_NAME), exist_ok=True)

# Split
a_list = all_imgs[:CLASS_A_COUNT]
b_list = all_imgs[CLASS_A_COUNT:CLASS_A_COUNT+CLASS_B_COUNT]

for src in a_list:
    dst = os.path.join(DATA_DIR, CLASS_A_NAME, os.path.basename(src))
    shutil.move(src, dst)

for src in b_list:
    dst = os.path.join(DATA_DIR, CLASS_B_NAME, os.path.basename(src))
    shutil.move(src, dst)

print(f"✅ Moved {len(a_list)} images → {CLASS_A_NAME}")
print(f"✅ Moved {len(b_list)} images → {CLASS_B_NAME}")


Total images found: 246
✅ Moved 153 images → normal
✅ Moved 93 images → tumour


In [12]:
import pathlib

for folder in pathlib.Path(DATA_DIR).iterdir():
    if folder.is_dir():
        count = len(list(folder.glob("*")))
        print(f"{folder.name}: {count} images")


tumour: 93 images
normal: 153 images


In [13]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

IMG_SIZE = (224, 224)
BATCH_SIZE = 16

train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2, # 80% train, 20% val
    subset="training",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

# Cache + prefetch for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Build simple CNN
model = keras.Sequential([
    layers.Rescaling(1./255, input_shape=(224,224,3)),
    layers.Conv2D(32, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(128, 3, activation='relu'),
    layers.MaxPooling2D(),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)


Found 246 files belonging to 2 classes.
Using 197 files for training.
Found 246 files belonging to 2 classes.
Using 49 files for validation.


  super().__init__(**kwargs)


Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 2s/step - accuracy: 0.4732 - loss: 1.1181 - val_accuracy: 0.5714 - val_loss: 0.6856
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - accuracy: 0.6065 - loss: 0.6685 - val_accuracy: 0.5714 - val_loss: 0.6851
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - accuracy: 0.6220 - loss: 0.6758 - val_accuracy: 0.5714 - val_loss: 0.7009
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 2s/step - accuracy: 0.5892 - loss: 0.6845 - val_accuracy: 0.5714 - val_loss: 0.7623
Epoch 5/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - accuracy: 0.6337 - loss: 0.6682 - val_accuracy: 0.5714 - val_loss: 0.6925
Epoch 6/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 2s/step - accuracy: 0.6464 - loss: 0.6552 - val_accuracy: 0.5714 - val_loss: 0.6835
Epoch 7/10
[1m13/13[0m [32m━━━━━━━━━━

In [14]:
MODEL_PATH = "/content/lung_tumour_model.h5"
model.save(MODEL_PATH)
print("✅ Model saved at:", MODEL_PATH)




✅ Model saved at: /content/lung_tumour_model.h5


In [16]:
import os

print("Tumour samples:")
print(os.listdir("/content/lung_project/tumour")[:5])

print("\nNormal samples:")
print(os.listdir("/content/lung_project/normal")[:5])



Tumour samples:
['JPCNN066.png', 'JPCNN054.png', 'JPCNN020.png', 'JPCNN037.png', 'JPCNN082.png']

Normal samples:
['JPCLN096.png', 'JPCLN079.png', 'JPCLN021.png', 'JPCLN146.png', 'JPCLN019.png']


In [18]:
import os

print("Tumour samples:", os.listdir("/content/lung_project/tumour")[:10])
print("\nNormal samples:", os.listdir("/content/lung_project/normal")[:10])



Tumour samples: ['JPCNN066.png', 'JPCNN054.png', 'JPCNN020.png', 'JPCNN037.png', 'JPCNN082.png', 'JPCNN089.png', 'JPCNN091.png', 'JPCNN033.png', 'JPCNN043.png', 'JPCNN006.png']

Normal samples: ['JPCLN096.png', 'JPCLN079.png', 'JPCLN021.png', 'JPCLN146.png', 'JPCLN019.png', 'JPCLN069.png', 'JPCLN020.png', 'JPCLN077.png', 'JPCLN042.png', 'JPCLN091.png']


In [20]:
import os

print("Tumour samples:")
print(os.listdir("/content/lung_project/tumour")[:10])

print("\nNormal samples:")
print(os.listdir("/content/lung_project/normal")[:10])



Tumour samples:
['JPCNN066.png', 'JPCNN054.png', 'JPCNN020.png', 'JPCNN037.png', 'JPCNN082.png', 'JPCNN089.png', 'JPCNN091.png', 'JPCNN033.png', 'JPCNN043.png', 'JPCNN006.png']

Normal samples:
['JPCLN096.png', 'JPCLN079.png', 'JPCLN021.png', 'JPCLN146.png', 'JPCLN019.png', 'JPCLN069.png', 'JPCLN020.png', 'JPCLN077.png', 'JPCLN042.png', 'JPCLN091.png']


In [21]:
# Test tumour
test_img = "/content/lung_project/tumour/JPCNN066.png"
print("Tumour test:", predict_image(test_img))

# Test normal
test_img = "/content/lung_project/normal/JPCLN096.png"
print("Normal test:", predict_image(test_img))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 182ms/step
Tumour test: ('Normal', np.float32(0.20936929))
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
Normal test: ('Normal', np.float32(0.21083398))


In [22]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

IMG_SIZE = (224, 224)
BATCH_SIZE = 16

train_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)
val_ds = tf.keras.utils.image_dataset_from_directory(
    DATA_DIR,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE
)

AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(500).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Data augmentation
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
])

# Pretrained base model
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(224,224,3),
    include_top=False,
    weights='imagenet'
)
base_model.trainable = False  # freeze feature extractor

# Build model
model = keras.Sequential([
    data_augmentation,
    layers.Rescaling(1./255),
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10
)

MODEL_PATH = "/content/lung_tumour_mobilenet.h5"
model.save(MODEL_PATH)
print("✅ Transfer learning model saved at:", MODEL_PATH)


Found 246 files belonging to 2 classes.
Using 197 files for training.
Found 246 files belonging to 2 classes.
Using 49 files for validation.
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 1s/step - accuracy: 0.6395 - loss: 0.7207 - val_accuracy: 0.5714 - val_loss: 0.6454
Epoch 2/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 768ms/step - accuracy: 0.6504 - loss: 0.6677 - val_accuracy: 0.5918 - val_loss: 0.7027
Epoch 3/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 771ms/step - accuracy: 0.6329 - loss: 0.7084 - val_accuracy: 0.6122 - val_loss: 0.6783
Epoch 4/10
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 782ms/step - accuracy: 0.6579 - loss: 0.6217 - val_a



✅ Transfer learning model saved at: /content/lung_tumour_mobilenet.h5


In [23]:
print(predict_image("/content/lung_project/tumour/JPCNN066.png"))
print(predict_image("/content/lung_project/normal/JPCLN096.png"))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
('Tumour', np.float32(0.65461606))
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
('Tumour', np.float32(0.65422356))


In [24]:
import streamlit as st
import tensorflow as tf
from tensorflow.keras.utils import load_img, img_to_array
import numpy as np

# Load model
MODEL_PATH = "lung_tumour_mobilenet.h5"
model = tf.keras.models.load_model(MODEL_PATH)
IMG_SIZE = (224,224)

st.title("🫁 Lung Tumour Detection App")
st.write("Upload a lung scan image to classify as **Tumour** or **Normal**.")

uploaded_file = st.file_uploader("Choose an image...", type=["jpg","png","jpeg"])

def predict_image(img):
    img = load_img(img, target_size=IMG_SIZE)
    img_array = img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)
    pred = model.predict(img_array)[0][0]
    label = "Tumour" if pred > 0.5 else "Normal"
    return label, pred

if uploaded_file is not None:
    st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
    label, score = predict_image(uploaded_file)
    st.write(f"### Prediction: {label}")
    st.write(f"Confidence Score: {float(score):.2f}")


ModuleNotFoundError: No module named 'streamlit'