In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Set the dataset directory
data_dir = 'D:/Capestone Dataset/Multi Cancer/Multi Cancer'


In [2]:

# Extract file paths and labels
filepaths, main_labels, subclass_labels = [], [], []

for main_cancer in os.listdir(data_dir):  
    main_cancer_path = os.path.join(data_dir, main_cancer)
    
    if os.path.isdir(main_cancer_path):
        for subclass in os.listdir(main_cancer_path):
            subclass_path = os.path.join(main_cancer_path, subclass)
            
            if os.path.isdir(subclass_path):
                for img_name in os.listdir(subclass_path):
                    if img_name.lower().endswith(('png', 'jpg', 'jpeg')):
                        filepaths.append(os.path.join(subclass_path, img_name))
                        main_labels.append(main_cancer)  # Main cancer type
                        subclass_labels.append(subclass)  # Subclass


In [3]:

# Create a DataFrame
df = pd.DataFrame({'filepath': filepaths, 'main_type': main_labels, 'subclass': subclass_labels})


In [4]:

# Split data into training & validation
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df[['main_type', 'subclass']], random_state=42)

# Define ImageDataGenerator
datagen = ImageDataGenerator(rescale=1./255)


In [5]:

# Create data generators for training main cancer type classification
train_gen_main = datagen.flow_from_dataframe(
    train_df, x_col='filepath', y_col='main_type',
    target_size=(512, 512), batch_size=8, class_mode='categorical'
)

val_gen_main = datagen.flow_from_dataframe(
    val_df, x_col='filepath', y_col='main_type',
    target_size=(512, 512), batch_size=8, class_mode='categorical'
)

# Create data generators for training subclass classification
train_gen_subclass = datagen.flow_from_dataframe(
    train_df, x_col='filepath', y_col='subclass',
    target_size=(512, 512), batch_size=8, class_mode='categorical'
)

val_gen_subclass = datagen.flow_from_dataframe(
    val_df, x_col='filepath', y_col='subclass',
    target_size=(512, 512), batch_size=8, class_mode='categorical'
)

print("Data successfully loaded!")


Found 104001 validated image filenames belonging to 8 classes.
Found 26001 validated image filenames belonging to 8 classes.
Found 104001 validated image filenames belonging to 26 classes.
Found 26001 validated image filenames belonging to 26 classes.
Data successfully loaded!


In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adamax

# CNN Model for Main Cancer Type Classification (8 classes)
model_main = Sequential([
    # Convolutional Layers
    Conv2D(32, (3,3), activation='relu', input_shape=(512, 512, 3)),
    MaxPooling2D(pool_size=(2,2)),
    BatchNormalization(),

    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    BatchNormalization(),

    Conv2D(128, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    BatchNormalization(),

    Conv2D(256, (3,3), activation='relu'),
    MaxPooling2D(pool_size=(2,2)),
    BatchNormalization(),

    # Flatten & Fully Connected Layers
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.3),
    
    Dense(8, activation='softmax')  # 8 Main Cancer Types
])

# Compile Model
model_main.compile(optimizer=Adamax(learning_rate=0.001),
                loss='categorical_crossentropy',
                metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:

# Train Model
model_main.fit(train_gen_main, validation_data=val_gen_main, epochs=20)


  self._warn_if_super_not_called()


Epoch 1/20
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31541s[0m 2s/step - accuracy: 0.9023 - loss: 7.8755 - val_accuracy: 0.9842 - val_loss: 0.1128
Epoch 2/20
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31815s[0m 2s/step - accuracy: 0.9586 - loss: 0.6513 - val_accuracy: 0.9820 - val_loss: 0.2160
Epoch 3/20
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31972s[0m 2s/step - accuracy: 0.9721 - loss: 0.2205 - val_accuracy: 0.9938 - val_loss: 0.0530
Epoch 4/20
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31795s[0m 2s/step - accuracy: 0.9808 - loss: 0.1968 - val_accuracy: 0.9965 - val_loss: 0.0236
Epoch 5/20
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31884s[0m 2s/step - accuracy: 0.9856 - loss: 0.0752 - val_accuracy: 0.9806 - val_loss: 0.3517
Epoch 6/20
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31792s[0m 2s/step - accuracy: 0.9879 - loss: 0.1085 - val_accuracy: 0.9938 - val

<keras.src.callbacks.history.History at 0x1b0dd45aea0>

In [9]:
model_main.save("cancer_main_model.h5")

