In [1]:
!pip install opendatasets

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [2]:
import opendatasets as od
#5acab72028e81c5fbbddde9eb6202108
od.download("https://www.kaggle.com/datasets/xiaopengzhang12/lung-cancer-mri-images")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: olanle
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/xiaopengzhang12/lung-cancer-mri-images
Downloading lung-cancer-mri-images.zip to ./lung-cancer-mri-images


100%|██████████| 522M/522M [00:06<00:00, 83.9MB/s]





In [3]:
# Data paths (adjust if necessary)
data_dir = "//content/lung-cancer-mri-images/lung_cancer_MRI_dataset/train"
data_dir2 = "//content/lung-cancer-mri-images/lung_cancer_MRI_dataset/validate"
categories = ["cancer", "no_cancer"] #lung_aca", "lung_n", "lung_scc"]

# Image preprocessing parameters
img_size = (128, 128)  # Resize images to a consistent size
batch_size = 32

# Image data generation
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, img_size)
    image = image / 255.0  # Normalize pixel values
    return image

In [4]:
image_data = []
labels = []
for category_index, category in enumerate(categories):
    path = os.path.join(data_dir, category)
    for img in os.listdir(path):
        try:
            img_array = preprocess_image(os.path.join(path, img))
            image_data.append(img_array)
            labels.append(category_index)
        except Exception as e:
            print(f"Error processing image: {img}, Error: {e}")

# Convert to numpy arrays
image_data = np.array(image_data)
labels = np.array(labels)

In [5]:
image_data = []
labels = []
for category_index, category in enumerate(categories):
    # Process images from both directories in one loop
    for data_dir in [data_dir, data_dir2]:  # Iterate through both directories
        path = os.path.join(data_dir, category)
        for img in os.listdir(path):
            try:
                img_array = preprocess_image(os.path.join(path, img))
                image_data.append(img_array)
                labels.append(category_index)
            except Exception as e:
                print(f"Error processing image: {img}, Error: {e}")

# Convert to numpy arrays
image_data = np.array(image_data)
labels = np.array(labels)

In [6]:
# One-hot encode labels
labels = tf.keras.utils.to_categorical(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

In [7]:
# Build the CNN model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5), # Added dropout for regularization
    layers.Dense(2, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=batch_size, validation_split=0.1) # Reduced epochs and added validation_split

Epoch 1/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 98ms/step - accuracy: 0.6405 - loss: 0.6404 - val_accuracy: 0.7336 - val_loss: 0.4419
Epoch 2/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.7700 - loss: 0.4197 - val_accuracy: 0.7817 - val_loss: 0.3900
Epoch 3/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.8192 - loss: 0.3496 - val_accuracy: 0.7686 - val_loss: 0.4183
Epoch 4/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.8035 - loss: 0.3613 - val_accuracy: 0.7598 - val_loss: 0.3494
Epoch 5/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8365 - loss: 0.2959 - val_accuracy: 0.7948 - val_loss: 0.3390
Epoch 6/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.8345 - loss: 0.3024 - val_accuracy: 0.8079 - val_loss: 0.3292
Epoch 7/10
[1m65/65[0m [32m━━━

<keras.src.callbacks.history.History at 0x7b728fdec090>

In [9]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 59ms/step - accuracy: 0.8610 - loss: 0.3264
Test Loss: 0.3066
Test Accuracy: 0.8584


In [10]:
# Make predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step


In [11]:
# Classification report and confusion matrix
print(classification_report(y_true_classes, y_pred_classes))
print(confusion_matrix(y_true_classes, y_pred_classes))

              precision    recall  f1-score   support

           0       0.93      0.85      0.89       372
           1       0.76      0.88      0.81       200

    accuracy                           0.86       572
   macro avg       0.84      0.86      0.85       572
weighted avg       0.87      0.86      0.86       572

[[315  57]
 [ 24 176]]


In [12]:
model.save("lung_cancer_model.h5")



In [13]:
#Upload image
from google.colab import files
uploaded = files.upload()

#Preprocess the image
import cv2
import numpy as np

img_size = (128, 128)
file_name = list(uploaded.keys())[0]

def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, img_size)
    image = image / 255.0  # Normalize
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

image = preprocess_image(file_name)

#Load model if not already loaded
from tensorflow.keras.models import load_model
model = load_model("lung_cancer_model.h5")

#Predict and show confidence
class_names = ["cancer", "no_cancer"]
prediction = model.predict(image)[0]  # Get first (and only) result

#Print each class with its confidence
for i, class_name in enumerate(class_names):
    confidence = prediction[i] * 100
    print(f"{class_name.upper()}: {confidence:.2f}%")

#Final result
predicted_class = class_names[np.argmax(prediction)]
print(f"\n🧠 Final Prediction: {predicted_class.upper()} (with {np.max(prediction)*100:.2f}% confidence)")



Saving Screenshot 2025-06-27 155530.png to Screenshot 2025-06-27 155530.png
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 614ms/step
CANCER: 96.33%
NO_CANCER: 8.01%

🧠 Final Prediction: CANCER (with 96.33% confidence)


SAVING MODEL TO GITHUB
