In [1]:
import os
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adamax
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Flatten, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import confusion_matrix, classification_report
import joblib
import warnings

warnings.filterwarnings("ignore")

# Enable GPU Memory Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

print('Modules loaded')


Modules loaded


In [2]:
# Dataset path
data_dir = 'D:/Capestone Dataset/Multi Cancer/Multi Cancer'

target_folders = ['ALL', 'Brain Cancer', 'Breast Cancer', 'Cervical Cancer', 
                'Kidney Cancer', 'Lung and Colon Cancer', 'Lymphoma', 'Oral Cancer']  
filepaths = []
labels = []

for folder in target_folders:
    folder_path = os.path.join(data_dir, folder)
    
    if os.path.isdir(folder_path):
        class_dirs = os.listdir(folder_path)
        
        for class_dir in class_dirs:
            class_path = os.path.join(folder_path, class_dir)
            
            if os.path.isdir(class_path):
                images = [img for img in os.listdir(class_path) 
                        if img.lower().endswith(('png', 'jpg', 'jpeg'))]  
                
                for img in images:
                    img_path = os.path.join(class_path, img)
                    filepaths.append(img_path)
                    labels.append(class_dir)  

# Create DataFrame
df = pd.DataFrame({'filepath': filepaths, 'labels': labels})
print(df)


                                                 filepath      labels
0       D:/Capestone Dataset/Multi Cancer/Multi Cancer...  all_benign
1       D:/Capestone Dataset/Multi Cancer/Multi Cancer...  all_benign
2       D:/Capestone Dataset/Multi Cancer/Multi Cancer...  all_benign
3       D:/Capestone Dataset/Multi Cancer/Multi Cancer...  all_benign
4       D:/Capestone Dataset/Multi Cancer/Multi Cancer...  all_benign
...                                                   ...         ...
129997  D:/Capestone Dataset/Multi Cancer/Multi Cancer...    oral_scc
129998  D:/Capestone Dataset/Multi Cancer/Multi Cancer...    oral_scc
129999  D:/Capestone Dataset/Multi Cancer/Multi Cancer...    oral_scc
130000  D:/Capestone Dataset/Multi Cancer/Multi Cancer...    oral_scc
130001  D:/Capestone Dataset/Multi Cancer/Multi Cancer...    oral_scc

[130002 rows x 2 columns]


In [3]:
# Image Data Generator with preprocessing
train_datagen = ImageDataGenerator(
    rescale=1./255, 
    validation_split=0.2
)

# Train & Validation Data
train_data = train_datagen.flow_from_dataframe(
    dataframe=df,
    x_col='filepath',
    y_col='labels',
    target_size=(256, 256),  
    batch_size=8,  # Reduce batch size to avoid memory issues
    class_mode='categorical',
    subset='training',
    shuffle=True
)

val_data = train_datagen.flow_from_dataframe(
    dataframe=df,
    x_col='filepath',
    y_col='labels',
    target_size=(256, 256),  
    batch_size=8,  # Match training batch size
    class_mode='categorical',
    subset='validation',
    shuffle=True
)


Found 104002 validated image filenames belonging to 26 classes.
Found 26000 validated image filenames belonging to 26 classes.


In [4]:
# Define Fully Connected Network (FCN) Model
model = Sequential([
    Flatten(input_shape=(256, 256, 3)),  
    Dense(512, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(26, activation='softmax')  
])

# Compile Model
model.compile(
    optimizer=Adamax(learning_rate=0.001),  
    loss='categorical_crossentropy',  
    metrics=['accuracy']
)

# Model Summary
model.summary()

In [5]:
# Train Model
history = model.fit(
    train_data,  # Training dataset
    validation_data=val_data,  # Validation dataset
    epochs=10,  # Number of epochs
    batch_size=8,  # Batch size (reduce if memory issues)
    verbose=1  # Show training progress
)


Epoch 1/10
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10332s[0m 794ms/step - accuracy: 0.3103 - loss: 2.0891 - val_accuracy: 0.0180 - val_loss: 10.3108
Epoch 2/10
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10339s[0m 795ms/step - accuracy: 0.4464 - loss: 1.4669 - val_accuracy: 0.0225 - val_loss: 12.9894
Epoch 3/10
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10349s[0m 796ms/step - accuracy: 0.4735 - loss: 1.3910 - val_accuracy: 0.0172 - val_loss: 13.5484
Epoch 4/10
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10319s[0m 794ms/step - accuracy: 0.5004 - loss: 1.3094 - val_accuracy: 0.0247 - val_loss: 15.6450
Epoch 5/10
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10302s[0m 792ms/step - accuracy: 0.5138 - loss: 1.2531 - val_accuracy: 0.0204 - val_loss: 18.4138
Epoch 6/10
[1m13001/13001[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10316s[0m 793ms/step - accuracy: 0.5254 - loss: 1.2354 - val