In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
import os

# Global variable for Multi-Cancer dataset's filepath
base_dir = 'Multi-Cancer'

In [None]:
def get_class_subclass_directories(root_dir, exclude_dir):
    """
    Generates a list of (class, subclass) directory tuples from the root directory,
    excluding the specified directory.
    """
    class_subclass_dirs = []
    for class_dir in os.listdir(root_dir):
        class_path = os.path.join(root_dir, class_dir)
        if os.path.isdir(class_path) and class_dir.lower() != exclude_dir.lower():
            for subclass_dir in os.listdir(class_path):
                subclass_path = os.path.join(class_path, subclass_dir)
                if os.path.isdir(subclass_path):
                    class_subclass_dirs.append((class_dir, subclass_dir))
    return class_subclass_dirs

filtered_dirs = get_class_subclass_directories(base_dir, 'all')

# Print out the directories being used
for class_dir, subclass_dir in filtered_dirs:
    print(f"Class: {class_dir}, Subclass: {subclass_dir}")


In [4]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam

def create_model(num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(200, 200, 3)),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Dropout(0.2),  # Adjusted dropout rate

        Conv2D(64, (3, 3), activation='relu'),
        BatchNormalization(),
        MaxPooling2D(2, 2),
        Dropout(0.2),  # Adjusted dropout rate

        Flatten(),

        Dense(256, activation='relu'),
        Dropout(0.4),  # Adjusted dropout rate

        Dense(num_classes, activation='softmax')
    ])
    adam_optimizer = Adam(learning_rate=0.001)  

    model.compile(optimizer=adam_optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import os
import shutil
import numpy as np

def split_data(base_dir, class_dir, subclass, train_size=0.75, val_size=0.15):
    """
    Split data into training, validation, and testing sets for a given class and subclass.
    """
    # Paths for source and split directories
    source_dir = os.path.join(base_dir, class_dir, subclass)
    train_dir = os.path.join(base_dir, class_dir, 'train', subclass)
    val_dir = os.path.join(base_dir, class_dir, 'val', subclass)
    test_dir = os.path.join(base_dir, class_dir, 'test', subclass)

    # Create directories if they don't exist
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(val_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)

    # List all files in the source directory
    files = os.listdir(source_dir)
    np.random.shuffle(files)

    # Split files
    total_files = len(files)
    train_end = int(total_files * train_size)
    val_end = train_end + int(total_files * val_size)

    # Copy files to respective directories
    for i, file in enumerate(files):
        if i < train_end:
            shutil.copy(os.path.join(source_dir, file), train_dir)
        elif i < val_end:
            shutil.copy(os.path.join(source_dir, file), val_dir)
        else:
            shutil.copy(os.path.join(source_dir, file), test_dir)

# Loop through each class-subclass pair
for class_dir, subclass in filtered_dirs:
    print(f"Class: {class_dir}, Subclass: {subclass}")
    split_data(base_dir, class_dir, subclass)


In [None]:


from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os

def create_generators(base_dir, class_dir, batch_size, target_size=(200, 200)):
    print("Batch size received in create_generators:", batch_size)
    # Rest of the function code
    """
    Create training, validation, and testing generators.
    """
    # Data generator with augmentation for training
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=30,
        width_shift_range=0.15,
        height_shift_range=0.15,
        shear_range=0.15,
        zoom_range=0.15,
        horizontal_flip=True,
        fill_mode='nearest'
    )

    # Data generator without augmentation for validation and testing
    val_test_datagen = ImageDataGenerator(rescale=1./255)

    # Training generator
    train_generator = train_datagen.flow_from_directory(
        os.path.join(base_dir, class_dir, 'train'),
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True
    )

    # Validation generator
    validation_generator = val_test_datagen.flow_from_directory(
        os.path.join(base_dir, class_dir, 'val'),
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical'
    )

    # Testing generator
    test_generator = val_test_datagen.flow_from_directory(
        os.path.join(base_dir, class_dir, 'test'),
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False  # Usually, we don't shuffle the test data
    )

    return train_generator, validation_generator, test_generator


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min')
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, verbose=1, mode='min')
trained = []
def train_model_for_each_class(base_dir, class_subclass_dirs):
    for class_dir, _ in set([(class_dir, _) for class_dir, _ in class_subclass_dirs]):
        
        if(class_dir in trained):
            continue
        trained.append(class_dir)
        print(f"Training model for {class_dir}")

        train_subdir = os.path.join(base_dir, class_dir, 'train')

        if os.path.exists(train_subdir):
            num_classes = len(os.listdir(train_subdir))
        else:
            raise ValueError(f"Train directory for '{class_dir}' not found.")

        # Create a new instance of the model for this class
        model = create_model(num_classes)
        
        # Create data generators for the current class
        train_generator, validation_generator, test_generator = create_generators(base_dir, class_dir, 16) 
        print("Training Generator - batch size:", train_generator.batch_size, "samples:", train_generator.n)
        print("Validation Generator - batch size:", validation_generator.batch_size, "samples:", validation_generator.n)

        # Train the model
        history = model.fit(
            train_generator,
            validation_data=validation_generator,
            epochs=50,
            callbacks=[early_stopping, reduce_lr]
        )

        # Save the model
        model_path = f'model_{class_dir}.h5'
        model.save(model_path)

        # Evaluate the model on the test set
        test_loss, test_accuracy = model.evaluate(test_generator)
        print(f"Test Loss for {class_dir}: {test_loss}")
        print(f"Test Accuracy for {class_dir}: {test_accuracy}")

train_model_for_each_class(base_dir, filtered_dirs)
