Converting to single format and training a single model for the same.

In [1]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import recall_score
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import layers, models


In [2]:
# Define paths
base_dir = 'D:/PKG - C-NMC 2019/C-NMC_training_data'
folds = ['fold_0', 'fold_1', 'fold_2']
categories = ['all', 'hem']

# Parameters
img_width, img_height = 150, 150
batch_size = 32
epochs = 10


In [3]:
# Updated Function to create data generators
def create_data_generators(base_dir, folds, categories, img_width, img_height, batch_size, validation_split=0.1):
    train_generators = []
    val_generators = []
    
    for fold in folds:
        # Directory for current fold
        fold_dir = os.path.join(base_dir, fold)
        
        # Create separate train and validation generators for each fold
        train_datagen = ImageDataGenerator(
            rescale=1.0/255.0,
            rotation_range=20,
            width_shift_range=0.2,
            height_shift_range=0.2,
            shear_range=0.2,
            zoom_range=0.2,
            horizontal_flip=True,
            validation_split=validation_split
        )
        
        train_generator = train_datagen.flow_from_directory(
            fold_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='binary',  # Assuming binary classification
            subset='training'
        )
        
        val_generator = train_datagen.flow_from_directory(
            fold_dir,
            target_size=(img_width, img_height),
            batch_size=batch_size,
            class_mode='binary',  # Assuming binary classification
            subset='validation'
        )
        
        # Append generators to lists
        train_generators.append(train_generator)
        val_generators.append(val_generator)
    
    return train_generators, val_generators

# Create data generators
train_generators, val_generators = create_data_generators(base_dir, folds, categories, img_width, img_height, batch_size)


Found 3175 images belonging to 2 classes.
Found 352 images belonging to 2 classes.
Found 3224 images belonging to 2 classes.
Found 357 images belonging to 2 classes.
Found 3199 images belonging to 2 classes.
Found 354 images belonging to 2 classes.


In [4]:
# Define the ResNet model
def build_resnet():
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))
    x = base_model.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(1024, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    predictions = layers.Dense(1, activation='sigmoid')(x)
    
    model = models.Model(inputs=base_model.input, outputs=predictions)
    
    # Freeze the convolutional base
    for layer in base_model.layers:
        layer.trainable = False
    
    return model

# Build the ResNet model
resnet = build_resnet()


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [5]:
# Compile the model
resnet.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])


In [6]:
histories = []  # To store history for each fold

for i in range(len(folds)):
    print(f"Training on fold {i}")
    
    # Get the generators for this fold
    train_generator = train_generators[i]
    val_generator = val_generators[i]
    
    # Train the model for this fold
    history = resnet.fit(
        train_generator,
        steps_per_epoch=train_generator.samples // batch_size,
        epochs=epochs,
        validation_data=val_generator,
        validation_steps=val_generator.samples // batch_size
    )
    
    histories.append(history)  # Store the history for this fold


Training on fold 0
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training on fold 1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training on fold 2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [7]:
# Function to evaluate model and compute recall, accuracy, and loss
def evaluate_model(model, generator):
    # Get true labels
    y_true = generator.classes
    
    # Predict probabilities
    y_pred_prob = model.predict(generator)
    
    # Convert probabilities to binary predictions
    y_pred_binary = np.round(y_pred_prob).flatten()
    
    # Calculate recall
    recall = recall_score(y_true, y_pred_binary)
    
    # Calculate accuracy and loss
    loss, accuracy = model.evaluate(generator, verbose=0)
    
    return recall, accuracy, loss

# Evaluate on each fold
for i in range(len(folds)):
    print(f"Evaluating on fold {i}")
    val_generator = val_generators[i]
    recall, accuracy, loss = evaluate_model(resnet, val_generator)
    print(f"Recall for fold {i}: {recall:.4f}")
    print(f"Accuracy for fold {i}: {accuracy:.4f}")
    print(f"Loss for fold {i}: {loss:.4f}")


Evaluating on fold 0
Recall for fold 0: 0.0000
Accuracy for fold 0: 0.6790
Loss for fold 0: 0.6243
Evaluating on fold 1
Recall for fold 1: 0.0000
Accuracy for fold 1: 0.6751
Loss for fold 1: 0.6280
Evaluating on fold 2
Recall for fold 2: 0.0000
Accuracy for fold 2: 0.6921
Loss for fold 2: 0.6128


In [8]:
# Save the model
resnet.save('resnet_binary.h5')
print("Model saved as resnet_binary.h5")


Model saved as resnet_binary.h5


In [None]:
# Evaluation on test set (fold_1)
test_generator = datagen.flow_from_directory(
    os.path.join(base_dir, folds[1]),  # Use fold_1 for testing
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary',  # Assuming binary classification
    subset='validation',  # Use 'validation' subset
    shuffle=False  # Keep data order to match predictions with labels
)

# Evaluate the model
loss, accuracy = model.evaluate(test_generator)

# Predict probabilities for the test set
y_pred_prob = model.predict(test_generator)

# Convert probabilities to class labels
y_pred = (y_pred_prob > 0.5).astype(int)

# Extract true labels
y_true = test_generator.classes

# Calculate recall (sensitivity) using sklearn's recall_score
from sklearn.metrics import recall_score

recall = recall_score(y_true, y_pred)

print(f"Recall: {recall:.4f}")
print(f"Test Loss: {loss:.4f}")
print(f"Test Accuracy: {accuracy:.4f}")


Completed the entire process with purely one folder