In [1]:
import kagglehub

path = kagglehub.dataset_download(
    "yuvrajkari7/multi-cancer-prediction-stage-2"
)

print(path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/yuvrajkari7/multi-cancer-prediction-stage-2?dataset_version_number=1...


100%|██████████| 2.76G/2.76G [00:25<00:00, 117MB/s]

Extracting files...





/root/.cache/kagglehub/datasets/yuvrajkari7/multi-cancer-prediction-stage-2/versions/1


In [8]:
import os

base_dir  = os.path.join(path, "stage 2")
train_dir = os.path.join(base_dir, "train")
test_dir  = os.path.join(base_dir, "test")

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=15,
    zoom_range=0.1,
    horizontal_flip=True
)

test_gen = ImageDataGenerator(rescale=1./255)

train_data = train_gen.flow_from_directory(
    train_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_data = test_gen.flow_from_directory(
    test_dir,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

Found 16000 images belonging to 4 classes.
Found 4001 images belonging to 4 classes.


In [11]:
print(train_data.class_indices)
print("Classes:", train_data.num_classes)

{'breast': 0, 'kidney': 1, 'lung': 2, 'oral': 3}
Classes: 4


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import (
    Conv2D, MaxPooling2D, Flatten,
    Dense, Dropout, BatchNormalization
)

model = Sequential([
    Conv2D(32, (3,3), activation="relu", input_shape=(224,224,3)),
    BatchNormalization(),
    MaxPooling2D(),

    Conv2D(64, (3,3), activation="relu"),
    BatchNormalization(),
    MaxPooling2D(),

    Conv2D(128, (3,3), activation="relu"),
    BatchNormalization(),
    MaxPooling2D(),

    Flatten(),
    Dense(256, activation="relu"),
    Dropout(0.5),

    Dense(4, activation="softmax")   # 4 classes confirmed
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)
model.summary()

In [16]:
EPOCHS = 1

history = model.fit(
    train_data,
    validation_data=test_data,
    epochs=EPOCHS
)

[1m500/500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m250s[0m 500ms/step - accuracy: 0.9578 - loss: 3.4748 - val_accuracy: 0.9580 - val_loss: 1.8204


In [17]:
model.save("multi_cancer_stage2_model.keras")

In [18]:
loss, acc = model.evaluate(test_data)
print(f"Test Accuracy: {acc:.4f}")

[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 132ms/step - accuracy: 0.9880 - loss: 0.2967
Test Accuracy: 0.9580


In [19]:
class_names = list(train_data.class_indices.keys())
print(class_names)

['breast', 'kidney', 'lung', 'oral']
