In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.utils import resample
import cv2
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model
from scipy.ndimage import gaussian_filter
import os
import shutil

In [2]:
# Define paths and parameters
base_dir = "D:/DATASET/CNN/ballooning/train"
val_dir = "D:/DATASET/CNN/ballooning/val"
batch_size = 32

In [3]:
# Create ImageDataGenerator for training with preprocessing function
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    preprocessing_function=lambda x: np.mean(x, axis=2, keepdims=True)  # Convert to grayscale
)

In [4]:
# Load and iterate training dataset
train_generator = train_datagen.flow_from_directory(
    base_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',
    color_mode='rgb'
)

Found 27698 images belonging to 2 classes.


In [5]:
validation_generator = train_datagen.flow_from_directory(
    val_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    subset='validation'  # Specify this is validation data
)

Found 0 images belonging to 2 classes.


In [None]:
def save_balanced_dataset(source_dir, target_dir, batch_size=32):
    """
    Balance and save the dataset by augmenting the minority class and saving the images to a new directory.

    Parameters:
    - source_dir: string, directory containing the original unbalanced dataset.
    - target_dir: string, directory to save the balanced dataset.
    - batch_size: int, size of the batch for processing images.
    """
    # Define data generator with augmentation for the minority class
    datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.1,
        zoom_range=0.1,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    # Load images in batches from source_dir and apply augmentation
    generator = datagen.flow_from_directory(
        source_dir,
        target_size=(224, 224),  # Assuming ResNet input dimensions
        batch_size=batch_size,
        class_mode='binary',  # For binary classification
        save_to_dir=target_dir,  # Directory to save augmented images
        save_prefix='aug',  # Prefix for filenames of augmented images
        save_format='jpeg'
    )

    # Determine how many batches are needed to balance the dataset
    num_batches = 2000  # Adjust this number based on your specific dataset needs

    # Generate and save the augmented images
    for i in range(num_batches):
        images, labels = next(generator)
        print(f'Processed batch {i+1}/{num_batches}')

In [None]:
# Set the paths
source_directory = "D:/DATASET/CNN/ballooning/train"
target_directory = "D:/DATASET/CNN/ballooning/balanced_train"

# Create target directory if it does not exist
if not os.path.exists(target_directory):
    os.makedirs(target_directory)

# Call the function to balance and save the dataset
save_balanced_dataset(source_directory, target_directory)

print("Dataset balancing complete and saved to:", target_directory)

In [None]:
# Load ResNet50 model pre-trained on ImageNet without the top layer
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [None]:
# Freeze the layers of the base model
for layer in base_model.layers:
    layer.trainable = False

In [None]:
# Add custom layers on top of the base model
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)  # Using sigmoid for binary classification

In [None]:
# Create the complete model
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# Compile the model
model.compile(optimizer=Adam(lr=0.0001), loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model


In [None]:
# Plot the training and validation accuracy
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.show()

In [None]:
def preprocess_and_predict(img_path, model):
    """
    Process an image file to predict hepatocyte ballooning using a trained model.

    Parameters:
    - img_path: string, path to the user's liver biopsy image.
    - model: loaded TensorFlow model trained to detect hepatocyte ballooning.

    Returns:
    - prediction: int, predicted class (0 for no ballooning, 1 for ballooning present).
    - prob: float, probability of the class prediction.
    """
    # Load the image
    img = image.load_img(img_path, target_size=(224, 224), color_mode='rgb')
    img_array = image.img_to_array(img)

    # Convert to grayscale
    gray = np.dot(img_array[..., :3], [0.2989, 0.5870, 0.1140])
    
    # Apply Gaussian filter
    filtered = gaussian_filter(gray, sigma=1)

    # Normalize the image
    filtered /= 255.0
    
    # Reshape for the model
    filtered = np.expand_dims(filtered, axis=-1)  # Add channel dimension
    filtered = np.repeat(filtered, 3, axis=-1)  # Make 3 channel for ResNet input
    filtered = np.expand_dims(filtered, axis=0)  # Add batch dimension

    # Predict using the model
    predictions = model.predict(filtered)
    predicted_class = np.argmax(predictions, axis=1)[0]
    probability = np.max(predictions, axis=1)[0]

    # Plot the processed image
    plt.imshow(filtered[0], cmap='gray')
    plt.title(f'Processed Image - Predicted Class: {predicted_class}, Probability: {probability:.2f}')
    plt.show()

    return predicted_class, probability

In [None]:
# Example of loading the model and using the function
# model = load_model('path_to_your_trained_model.h5')

In [None]:
img_path = "D:/DATASET/CNN/ballooning/val/1/97_5_43.png"
predicted_class, probability = preprocess_and_predict(img_path, model)
print(f'Predicted Class: {predicted_class} (0: No Ballooning, 1: Ballooning Present), Probability: {probability:.2f}')