In [1]:
!pip install opendatasets

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl.metadata (9.2 kB)
Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [2]:
import opendatasets as od
#5acab72028e81c5fbbddde9eb6202108
od.download("https://www.kaggle.com/datasets/xiaopengzhang12/lung-cancer-mri-images")

Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: Olanle
Your Kaggle Key: ··········
Dataset URL: https://www.kaggle.com/datasets/xiaopengzhang12/lung-cancer-mri-images


In [3]:
# Data paths (adjust if necessary)
data_dir = "//content/lung-cancer-mri-images/lung_cancer_MRI_dataset/train"
data_dir2 = "//content/lung-cancer-mri-images/lung_cancer_MRI_dataset/validate"
categories = ["cancer", "no_cancer"] #lung_aca", "lung_n", "lung_scc"]

# Image preprocessing parameters
img_size = (128, 128)  # Resize images to a consistent size
batch_size = 32

# Image data generation
def preprocess_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.resize(image, img_size)
    image = image / 255.0  # Normalize pixel values
    return image

In [4]:
image_data = []
labels = []
for category_index, category in enumerate(categories):
    path = os.path.join(data_dir, category)
    for img in os.listdir(path):
        try:
            img_array = preprocess_image(os.path.join(path, img))
            image_data.append(img_array)
            labels.append(category_index)
        except Exception as e:
            print(f"Error processing image: {img}, Error: {e}")

# Convert to numpy arrays
image_data = np.array(image_data)
labels = np.array(labels)

In [5]:
image_data = []
labels = []
for category_index, category in enumerate(categories):
    # Process images from both directories in one loop
    for data_dir in [data_dir, data_dir2]:  # Iterate through both directories
        path = os.path.join(data_dir, category)
        for img in os.listdir(path):
            try:
                img_array = preprocess_image(os.path.join(path, img))
                image_data.append(img_array)
                labels.append(category_index)
            except Exception as e:
                print(f"Error processing image: {img}, Error: {e}")

# Convert to numpy arrays
image_data = np.array(image_data)
labels = np.array(labels)

In [6]:
# One-hot encode labels
labels = tf.keras.utils.to_categorical(labels)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)

In [7]:
# Build the CNN model
model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5), # Added dropout for regularization
    layers.Dense(2, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=batch_size, validation_split=0.1) # Reduced epochs and added validation_split

Epoch 1/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 69ms/step - accuracy: 0.6674 - loss: 0.6183 - val_accuracy: 0.7598 - val_loss: 0.4269
Epoch 2/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.8004 - loss: 0.3805 - val_accuracy: 0.7904 - val_loss: 0.3781
Epoch 3/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.8196 - loss: 0.3367 - val_accuracy: 0.8166 - val_loss: 0.3631
Epoch 4/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8398 - loss: 0.3009 - val_accuracy: 0.7948 - val_loss: 0.3600
Epoch 5/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.8413 - loss: 0.3010 - val_accuracy: 0.8122 - val_loss: 0.3670
Epoch 6/10
[1m65/65[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.8569 - loss: 0.2615 - val_accuracy: 0.7773 - val_loss: 0.4105
Epoch 7/10
[1m65/65[0m [32m━━━

<keras.src.callbacks.history.History at 0x79895cf5a150>

In [9]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - accuracy: 0.8521 - loss: 0.4505
Test Loss: 0.3862
Test Accuracy: 0.8497


In [10]:
# Make predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step


In [11]:
# Classification report and confusion matrix
print(classification_report(y_true_classes, y_pred_classes))
print(confusion_matrix(y_true_classes, y_pred_classes))

              precision    recall  f1-score   support

           0       0.94      0.82      0.88       372
           1       0.73      0.90      0.81       200

    accuracy                           0.85       572
   macro avg       0.84      0.86      0.84       572
weighted avg       0.87      0.85      0.85       572

[[306  66]
 [ 20 180]]
