In [1]:
# --------------------------
# Step 1: Imports & Settings
# --------------------------
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

DATA_DIR = '/kaggle/input/lidcidri/LIDC-IDRI-slices'
IMG_SIZE = 128
NUM_CLASSES = 2
EPOCHS = 20
BATCH_SIZE = 32
MODEL_PATH = '/kaggle/working/lidc_demo_model.h5'

# --------------------------
# Step 2: Load Slices + Labels
# --------------------------
def load_data(data_dir):
    images = []
    labels = []

    for patient in os.listdir(data_dir):
        patient_path = os.path.join(data_dir, patient)
        if not os.path.isdir(patient_path):
            continue

        for nodule in os.listdir(patient_path):
            nodule_path = os.path.join(patient_path, nodule)
            image_folder = os.path.join(nodule_path, 'images')
            if not os.path.isdir(image_folder):
                continue

            # 🔁 Fake Label: 0 for nodule-0, 1 for nodule-1, etc.
            try:
                nodule_id = int(nodule.split('-')[-1])
                label = nodule_id % 2  # Simulated binary label
            except:
                continue

            for file in os.listdir(image_folder):
                if file.endswith(('.png', '.jpg')):
                    img_path = os.path.join(image_folder, file)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    if img is None:
                        continue
                    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                    images.append(img)
                    labels.append(label)

    if not images:
        raise ValueError("❌ No images found in dataset.")

    X = np.array(images).astype('float32') / 255.0
    X = np.expand_dims(X, axis=-1)
    y = to_categorical(labels, NUM_CLASSES)
    return train_test_split(X, y, test_size=0.2, random_state=42)

X_train, X_test, y_train, y_test = load_data(DATA_DIR)

# --------------------------
# Step 3: Augmentation
# --------------------------
datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
datagen.fit(X_train)

# --------------------------
# Step 4: CNN Model
# --------------------------
def build_model():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)),
        BatchNormalization(),
        MaxPooling2D(2, 2),

        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),

        Conv2D(128, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(NUM_CLASSES, activation='softmax')
    ])
    model.compile(optimizer=Adam(0.0005), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

model = build_model()

# --------------------------
# Step 5: Training
# --------------------------
model.fit(datagen.flow(X_train, y_train, batch_size=BATCH_SIZE),
          epochs=EPOCHS,
          validation_data=(X_test, y_test),
          verbose=1)

# --------------------------
# Step 6: Save
# --------------------------
model.save(MODEL_PATH)
print(f"✅ Saved model to {MODEL_PATH}")


2025-06-15 19:55:00.101451: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1750017300.284807      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1750017300.342929      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1750017509.406306      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Epoch 1/20


  self._warn_if_super_not_called()
I0000 00:00:1750017514.486729      59 service.cc:148] XLA service 0x7dd6ac002600 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1750017514.487358      59 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1750017514.901459      59 cuda_dnn.cc:529] Loaded cuDNN version 90300


[1m  5/389[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 33ms/step - accuracy: 0.5201 - loss: 3.0334

I0000 00:00:1750017518.315951      59 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 50ms/step - accuracy: 0.5813 - loss: 1.1514 - val_accuracy: 0.3830 - val_loss: 3.2601
Epoch 2/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 39ms/step - accuracy: 0.6111 - loss: 0.6829 - val_accuracy: 0.6132 - val_loss: 0.6628
Epoch 3/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 39ms/step - accuracy: 0.6182 - loss: 0.6678 - val_accuracy: 0.6177 - val_loss: 0.6594
Epoch 4/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 39ms/step - accuracy: 0.6099 - loss: 0.6676 - val_accuracy: 0.6177 - val_loss: 0.6588
Epoch 5/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 38ms/step - accuracy: 0.6117 - loss: 0.6687 - val_accuracy: 0.6177 - val_loss: 0.6607
Epoch 6/20
[1m389/389[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 39ms/step - accuracy: 0.6176 - loss: 0.6660 - val_accuracy: 0.6170 - val_loss: 0.6808
Epoch 7/20
[1m389/389[0m 

In [2]:
# --------------------------
# Step 1: Imports & Settings
# --------------------------
import os
import numpy as np
import cv2
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam

# Configuration
DATA_DIR = '/kaggle/input/lidcidri/LIDC-IDRI-slices'  # Path to your images
IMG_SIZE = 224  # EfficientNetB0 input size
NUM_CLASSES = 2
EPOCHS = 15
BATCH_SIZE = 32
MODEL_PATH = '/kaggle/working/lidc_transfer_model.h5'

# -----------------------------------
# Step 2: Load Images & Simulate Labels
# -----------------------------------
def load_images(data_dir):
    X, y = [], []
    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                img_path = os.path.join(root, file)
                img = cv2.imread(img_path)
                if img is None:
                    continue
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                X.append(img)
                y.append(np.random.randint(0, 2))  # ⚠️ Replace with true labels if available
    if not X:
        raise ValueError("❌ No images found.")
    X = np.array(X).astype('float32') / 255.0
    y = to_categorical(y, NUM_CLASSES)
    return train_test_split(X, y, test_size=0.2, random_state=42)

X_train, X_test, y_train, y_test = load_images(DATA_DIR)

# -----------------------
# Step 3: Image Augmentation
# -----------------------
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
datagen.fit(X_train)

# --------------------------
# Step 4: Build EfficientNetB0 Model
# --------------------------
def build_efficientnet_model():
    base_model = EfficientNetB0(include_top=False, weights='imagenet',
                                input_tensor=Input(shape=(IMG_SIZE, IMG_SIZE, 3)))
    base_model.trainable = False  # Freeze base model

    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(NUM_CLASSES, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=output)
    model.compile(optimizer=Adam(learning_rate=1e-4),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

model = build_efficientnet_model()

# ------------------------
# Step 5: Train the Model
# ------------------------
model.fit(datagen.flow(X_train, y_train, batch_size=BATCH_SIZE),
          validation_data=(X_test, y_test),
          epochs=EPOCHS,
          verbose=1)

# -----------------------
# Step 6: Save Trained Model
# -----------------------
model.save(MODEL_PATH)
print(f"✅ Saved model to {MODEL_PATH}")

# -----------------------
# Step 7: Load + Predict Function
# -----------------------
def predict_image(model_path, image_path):
    model = load_model(model_path)
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError("Image not found.")
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE)).astype('float32') / 255.0
    img = np.expand_dims(img, axis=0)
    prediction = model.predict(img)[0]
    class_id = np.argmax(prediction)
    confidence = prediction[class_id]
    return class_id, confidence

# # Optional usage after training
# test_img = '/kaggle/input/lidcidri/LIDC-IDRI-slices/LIDC-IDRI-0001/nodule-0/images/slice-0.png'
# pred_class, prob = predict_image(MODEL_PATH, test_img)
# print(f"🧠 Predicted class: {pred_class} with confidence {prob:.2f}")
