In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Dropout, Flatten, Input


In [2]:
# Base directory
base_dir = 'medical-images/'


In [3]:
# Extract dataset names (e.g., PathMNIST, BreastMNIST, BloodMNIST) and class count
datasets = os.listdir(base_dir)
datasets = sorted(datasets)  # Ensure consistent ordering
print(f"Datasets: {datasets}")

Datasets: ['bloodmnist', 'octmnist', 'pathmnist']


In [4]:
# Parse classes under each dataset
dataset_classes = {dataset: sorted(os.listdir(os.path.join(base_dir, dataset))) for dataset in datasets}
for dataset, classes in dataset_classes.items():
    print(f"{dataset} has classes: {classes}")


bloodmnist has classes: ['0', '1', '2', '3', '4', '5', '6', '7']
octmnist has classes: ['0', '1', '2', '3']
pathmnist has classes: ['0', '1', '2', '3', '4', '5', '6', '7', '8', 'PathMNIST of size 28.docx', 'pathmnist.zip']


In [5]:
# Dataset creation
img_size = 180
batch_size = 32

# Custom function to parse dataset and class labels from file paths
def extract_labels(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    dataset_name = parts[-3]  # Extract dataset name (e.g., PathMNIST)
    class_name = parts[-2]   # Extract class name (e.g., class0)
    dataset_index = datasets.index(dataset_name.numpy().decode())  # Convert dataset name to index
    class_index = int(class_name.numpy().decode().replace('class', ''))  # Extract class index
    return dataset_index, class_index

def preprocess_data(file_path, label):
    dataset_label, class_label = tf.py_function(func=extract_labels, inp=[file_path], Tout=[tf.int32, tf.int32])
    image = tf.image.decode_image(tf.io.read_file(file_path), channels=3)
    image = tf.image.resize(image, (img_size, img_size)) / 255.0
    return image, (dataset_label, class_label)


In [8]:
# Load datasets with labels
train_ds = tf.keras.utils.image_dataset_from_directory(
    base_dir,
    label_mode="int",  # Include labels as integers
    seed=123,
    validation_split=0.2,
    subset='training',
    batch_size=batch_size,
    image_size=(img_size, img_size)
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    base_dir,
    label_mode="int",  # Include labels as integers
    seed=123,
    validation_split=0.2,
    subset='validation',
    batch_size=batch_size,
    image_size=(img_size, img_size)
)

# Preprocess data function
def preprocess_data(image, label):
    image = tf.image.resize(image, (img_size, img_size)) / 255.0  # Normalize images
    return image, label

# Map preprocessing function
train_ds = train_ds.map(preprocess_data)
val_ds = val_ds.map(preprocess_data)

# Data augmentation
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal", input_shape=(img_size, img_size, 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
])

# Multi-Task Model
inputs = tf.keras.Input(shape=(img_size, img_size, 3))
x = data_augmentation(inputs)
x = layers.Conv2D(16, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(32, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu')(x)
x = layers.MaxPooling2D()(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.5)(x)
shared_features = layers.Dense(128, activation='relu')(x)

# Output 1: Dataset classification
dataset_output = layers.Dense(len(datasets), activation='softmax', name='dataset_output')(shared_features)

# Output 2: Class classification
max_classes = max(len(classes) for classes in dataset_classes.values())  # Maximum number of classes
class_output = layers.Dense(max_classes, activation='softmax', name='class_output')(shared_features)

# Compile model
model = tf.keras.Model(inputs=inputs, outputs=[dataset_output, class_output])
model.compile(optimizer='adam',
              loss={
                  'dataset_output': 'sparse_categorical_crossentropy',
                  'class_output': 'sparse_categorical_crossentropy'
              },
              metrics=['accuracy'])

# Summary of the model
model.summary()


Found 81145 files belonging to 3 classes.
Using 64916 files for training.
Found 81145 files belonging to 3 classes.
Using 16229 files for validation.


  super().__init__(**kwargs)


In [10]:
# Create a mapping function to structure labels for multi-task learning
def prepare_labels(image, label):
    # Assuming `label` contains the class index
    # You need a way to map each class index to the dataset index
    dataset_label = tf.constant(0, dtype=tf.int32)  # Replace with your dataset mapping logic
    return image, (dataset_label, label)

# Apply the mapping function to structure the dataset
train_ds = train_ds.map(prepare_labels)
val_ds = val_ds.map(prepare_labels)


In [12]:
# Define label preprocessing
def prepare_labels(image, label):
    # Map labels appropriately
    dataset_label = tf.constant(0, dtype=tf.int32)  # Adjust to your dataset-label logic
    return image, (dataset_label, label)

# Apply the label preparation to the datasets
train_ds = train_ds.map(prepare_labels)
val_ds = val_ds.map(prepare_labels)



In [13]:
for images, (dataset_labels, class_labels) in train_ds.take(1):
    print(f"Images shape: {images.shape}")
    print(f"Dataset labels: {dataset_labels}")
    print(f"Class labels: {class_labels}")


Images shape: (32, 180, 180, 3)
Dataset labels: 0
Class labels: (<tf.Tensor: shape=(), dtype=int32, numpy=0>, <tf.Tensor: shape=(32,), dtype=int32, numpy=
array([0, 0, 1, 2, 2, 1, 2, 1, 2, 1, 1, 2, 1, 0, 2, 2, 1, 2, 2, 0, 1, 2,
       1, 2, 1, 2, 2, 2, 2, 2, 1, 0], dtype=int32)>)


In [14]:
sample_images, (sample_dataset_labels, sample_class_labels) = next(iter(train_ds))
sample_predictions = model(sample_images)
print(f"Dataset prediction shape: {sample_predictions[0].shape}")
print(f"Class prediction shape: {sample_predictions[1].shape}")


Dataset prediction shape: (32, 3)
Class prediction shape: (32, 11)


In [20]:
import tensorflow as tf
from tensorflow.keras import layers, Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout
import numpy as np

# Parameters
img_size = 128
batch_size = 32
datasets = ['bloodmnist', 'octmnist', 'pathmnist']  # Adjust this based on your datasets
dataset_classes = {'bloodmnist': ['0', '1', '2', '3', '4', '5', '6', '7'],
                   'octmnist': ['0', '1', '2', '3'],
                   'pathmnist': ['0', '1', '2', '3', '4', '5', '6', '7', '8']}
base_dir = 'medical-images'  # Replace with your dataset path

# Dataset label mapping
dataset_labels_map = {dataset: i for i, dataset in enumerate(datasets)}

# Load datasets
train_ds = tf.keras.utils.image_dataset_from_directory(
    base_dir,
    label_mode="int",  # Include labels as integers
    seed=123,
    validation_split=0.2,
    subset='training',
    batch_size=batch_size,
    image_size=(img_size, img_size)
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    base_dir,
    label_mode="int",  # Include labels as integers
    seed=123,
    validation_split=0.2,
    subset='validation',
    batch_size=batch_size,
    image_size=(img_size, img_size)
)

# Define label preprocessing
def prepare_labels(image, label):
    # Determine dataset label based on the folder structure in the dataset directory
    dataset_label = label // 100  # Assuming dataset labels are in batches of 100 images per dataset
    class_label = label % 100  # Get the class within the dataset

    # Convert labels to one-hot encoding
    dataset_label_one_hot = tf.one_hot(dataset_label, len(datasets))
    return image, {'dataset_output': dataset_label_one_hot, 'class_output': class_label}

# Map dataset label preparation to train and validation datasets
train_ds = train_ds.map(prepare_labels)
val_ds = val_ds.map(prepare_labels)

# Data augmentation
data_augmentation = Sequential([
    layers.RandomFlip("horizontal", input_shape=(img_size, img_size, 3)),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
])

# Multi-task learning model
inputs = Input(shape=(img_size, img_size, 3))
x = data_augmentation(inputs)
x = Conv2D(16, 3, padding='same', activation='relu')(x)
x = MaxPooling2D()(x)
x = Conv2D(32, 3, padding='same', activation='relu')(x)
x = MaxPooling2D()(x)
x = Conv2D(64, 3, padding='same', activation='relu')(x)
x = MaxPooling2D()(x)
x = Flatten()(x)
x = Dropout(0.5)(x)
shared_features = Dense(128, activation='relu')(x)

# Output 1: Dataset classification
dataset_output = Dense(len(datasets), activation='softmax', name='dataset_output')(shared_features)

# Output 2: Class classification
max_classes = max(len(classes) for classes in dataset_classes.values())  # Maximum number of classes
class_output = Dense(max_classes, activation='softmax', name='class_output')(shared_features)

# Compile model
model = Model(inputs=inputs, outputs=[dataset_output, class_output])
model.compile(
    optimizer='adam',
    loss={
        'dataset_output': 'categorical_crossentropy',  # Changed to categorical_crossentropy
        'class_output': 'sparse_categorical_crossentropy',
    },
    metrics={
        'dataset_output': 'accuracy',  # Added metrics for each output
        'class_output': 'accuracy'
    }
)

# Train the model
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15
)

# Classify an image
def classify_image(image_path):
    input_image = tf.keras.utils.load_img(image_path, target_size=(img_size, img_size))
    input_image_array = tf.keras.utils.img_to_array(input_image)
    input_image_exp_dim = tf.expand_dims(input_image_array / 255.0, 0)

    dataset_pred, class_pred = model.predict(input_image_exp_dim)
    dataset_index = np.argmax(dataset_pred[0])
    class_index = np.argmax(class_pred[0])

    return f"The image belongs to {datasets[dataset_index]} and class {class_index}."

# Example usage
# result = classify_image('path/to/test/image.jpg')
# print(result)


Found 81145 files belonging to 3 classes.
Using 64916 files for training.
Found 81145 files belonging to 3 classes.
Using 16229 files for validation.
Epoch 1/15
[1m2029/2029[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m427s[0m 209ms/step - class_output_accuracy: 0.9785 - class_output_loss: 1.2822 - dataset_output_accuracy: 0.9981 - dataset_output_loss: 0.0324 - loss: 1.3145 - val_class_output_accuracy: 0.9904 - val_class_output_loss: 0.2701 - val_dataset_output_accuracy: 1.0000 - val_dataset_output_loss: 4.3897e-07 - val_loss: 0.2583
Epoch 2/15
[1m2029/2029[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m442s[0m 218ms/step - class_output_accuracy: 0.9899 - class_output_loss: 0.1404 - dataset_output_accuracy: 1.0000 - dataset_output_loss: 6.0846e-05 - loss: 0.1405 - val_class_output_accuracy: 0.9988 - val_class_output_loss: 0.0091 - val_dataset_output_accuracy: 1.0000 - val_dataset_output_loss: 0.0000e+00 - val_loss: 0.0091
Epoch 3/15
[1m2029/2029[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [21]:
# Example usage
result = classify_image('medical-images\image_1_class_2.png')
print(result)


  result = classify_image('medical-images\image_1_class_2.png')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step
The image belongs to bloodmnist and class 2.
