In [1]:
import numpy as np
import os
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from imblearn.over_sampling import SMOTE

# Define paths to your dataset
data_dir_train = "D:/DATASET/CNN/steatosis/train"
data_dir_val = "D:/DATASET/CNN/steatosis/train"

# Define image dimensions
img_height, img_width = 299, 299
batch_size = 32

In [2]:
# Load data using ImageDataGenerator
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # Splitting data into training and validation sets
)

train_generator = train_datagen.flow_from_directory(
    data_dir_train,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Use subset parameter for training data
)

validation_generator = train_datagen.flow_from_directory(
    data_dir_val,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Use subset parameter for validation data
)

Found 30815 images belonging to 4 classes.
Found 7701 images belonging to 4 classes.


In [3]:
# Load a batch of data
X_train_batch, y_train_batch = train_generator.next()

# Reshape the images to 2D arrays
X_train_reshape = X_train_batch.reshape(X_train_batch.shape[0], -1)

# Calculate the number of neighbors to use for SMOTE
n_neighbors = min(5, X_train_reshape.shape[0] - 1)

# Apply SMOTE with the adjusted number of neighbors
smote = SMOTE(k_neighbors=n_neighbors)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train_reshape, np.argmax(y_train_batch, axis=1))

ValueError: Expected n_neighbors <= n_samples,  but n_samples = 2, n_neighbors = 6

In [None]:
# Reshape X_train_resampled back to 4D array
X_train_resampled = X_train_resampled.reshape(X_train_resampled.shape[0], img_height, img_width, 3)

# Split the resampled data into train and validation sets
X_train_res, X_val_res, y_train_res, y_val_res = train_test_split(X_train_resampled, y_train_resampled, test_size=0.2, random_state=42)

In [None]:
# Define ResNet50 model architecture
base_model = ResNet50(weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(4, activation='softmax')(x)  # Change the number of units to 4
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze base model layers
for layer in base_model.layers:
    layer.trainable = False


In [None]:
# Compile the model
model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_res, y_train_res, validation_data=(X_val_res, y_val_res), epochs=3, batch_size=batch_size)

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(validation_generator)
print(f'Validation Loss: {loss}, Validation Accuracy: {accuracy}')

In [None]:
# Function for prediction and masking annotation
def predict_and_annotate(image_path):
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(299, 299))
    img_array = tf.keras.preprocessing.image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array /= 255.

    prediction = model.predict(img_array)
    predicted_class = np.argmax(prediction)
    
    # Define the NASH score mapping
    nash_scores = {
            0: "0: < 5% steatosis area",
            1: "1: 5–33% steatosis area",
            2: "2: 33–66% steatosis area",
            3: "3: >66% steatosis area"
        }

    # Get the NASH score corresponding to the maximum probability
    nash_score = nash_scores[predicted_class]
    
    # Perform masking annotation
    # For example, overlay a red mask on lipid vacuoles and blue on displaced nuclei
    masked_img = img_array[0].copy()
    lipid_mask = np.all(masked_img >= [0, 0, 0], axis=-1)  # Define conditions for lipid vacuoles
    nucleus_mask = np.all(masked_img <= [255, 255, 255], axis=-1)  # Define conditions for displaced nuclei

    masked_img[lipid_mask] = [255, 0, 0]  # Highlight lipid vacuoles in red
    masked_img[nucleus_mask] = [0, 0, 255]  # Highlight displaced nuclei in blue

    plt.imshow(masked_img)
    plt.axis('off')
    plt.show()

    return nash_score, prediction


In [None]:
# Example usage:
nash_score, prediction = predict_and_annotate("D:/DATASET/CNN/steatosis/train/3/297_231_26.png")
print("Predicted NAH score:", nash_score)
print("Prediction probabilities:", prediction)