In [2]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from PIL import Image
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Define the paths to your dataset folders
train_dataset_dir = "/Users/sadianasrintisha/Desktop/Dataset/Ulcerative Colitis (LIMUC) Dataset/train_and_validation_sets"
test_dataset_dir = "/Users/sadianasrintisha/Desktop/Dataset/Ulcerative Colitis (LIMUC) Dataset/test_set"

# Initialize empty lists for X_train, Y_train, X_test, and Y_test
X_train = []
Y_train = []
X_test = []
Y_test = []

# Initialize an empty list to store categorical labels
categorical_labels = []

# Define a function to read and preprocess images
def process_images(folder_path, label, is_train_set=True):
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if os.path.isfile(file_path) and filename.endswith(".bmp"):  # Check if it's a file and ends with .bmp
            # Open and resize the image to (32, 32, 3)
            img = Image.open(file_path)
            img = img.resize((32, 32))
            img = img.convert("RGB")
            
            # Convert image data to a NumPy array
            img_array = np.array(img).astype('float32')  # Convert to float
            
            # Normalize the image data (optional)
            img_array /= 255.0  # Normalize pixel values to [0, 1]
            
            # Append the image data to the appropriate list
            if is_train_set:
                X_train.append(img_array)
                Y_train.append(label)  # Append the numerical label
            else:
                X_test.append(img_array)
                Y_test.append(label)  # Append the numerical label
            
            # Append the label for categorical encoding
            categorical_labels.append(label)  # Append the numerical label

# List the folders inside the training dataset directory
train_folders = os.listdir(train_dataset_dir)

# Create a label encoder for categorical labels
label_encoder = LabelEncoder()

# Loop through the training folders and process images
for label, folder_name in enumerate(train_folders):
    folder_path = os.path.join(train_dataset_dir, folder_name)
    if os.path.isdir(folder_path):  # Check if it's a directory
        process_images(folder_path, label)

# List the folders inside the test dataset directory
test_folders = os.listdir(test_dataset_dir)

# Loop through the test folders and process images
for label, folder_name in enumerate(test_folders):
    folder_path = os.path.join(test_dataset_dir, folder_name)
    if os.path.isdir(folder_path):  # Check if it's a directory
        process_images(folder_path, label, is_train_set=False)
        
        
        

# Encode Y_train and Y_test categorically
num_classes = len(np.unique(categorical_labels))
  # Update this to match the number of classes

# Convert Y_train and Y_test to NumPy arrays
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)

# Make sure your labels are integers ranging from 0 to num_classes - 1
Y_train = Y_train.astype(int)
Y_test = Y_test.astype(int)

# One-hot encode the labels
Y_train_categorical = to_categorical(Y_train, num_classes=num_classes)
Y_test_categorical = to_categorical(Y_test, num_classes=num_classes)



# # Encode Y_train and Y_test categorically using the label encoder
# num_classes = len(np.unique(Y_train))  # Automatically determine the number of classes
# Y_train_categorical = to_categorical(Y_train, num_classes=num_classes)
# Y_test_categorical = to_categorical(Y_test, num_classes=num_classes)


# Check the shape of X_train, Y_train_categorical, X_test, and Y_test_categorical
print("Shape of X_train:", np.shape(X_train))
print("Shape of Y_train_categorical:", np.shape(Y_train_categorical))
print("Shape of X_test:", np.shape(X_test))
print("Shape of Y_test_categorical:", np.shape(Y_test_categorical))



Shape of X_train: (9590, 32, 32, 3)
Shape of Y_train_categorical: (9590, 5)
Shape of X_test: (1686, 32, 32, 3)
Shape of Y_test_categorical: (1686, 5)


In [4]:
from tensorflow.keras.optimizers.legacy import Adam as LegacyAdam

# Create the ResNet model without the top (fully connected) layers
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(32, 32, 3))
    
# Add the top layers for classification
model = Sequential()
model.add(base_model)
model.add(GlobalAveragePooling2D())
model.add(Dense(num_classes, activation='softmax'))  # Change units to match the number of classes
optimizer = LegacyAdam(learning_rate=0.001)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])





# Split your data into training and validation sets
from sklearn.model_selection import train_test_split

X_train, X_valid, Y_train_categorical, Y_valid_categorical = train_test_split(
    X_train, Y_train_categorical, test_size=0.2, random_state=42
)

# Create tf.data.Dataset for training and validation data
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train_categorical))
valid_dataset = tf.data.Dataset.from_tensor_slices((X_valid, Y_valid_categorical))

# Define batch size and shuffle the datasets
batch_size = 64
train_dataset = train_dataset.shuffle(buffer_size=len(X_train)).batch(batch_size)
valid_dataset = valid_dataset.batch(batch_size)




In [5]:
# Training the model
epochs = 50
model.fit(train_dataset, epochs=epochs, validation_data=valid_dataset)



Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x2988888d0>

In [6]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# Evaluate the model on the test dataset
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test_categorical))
test_dataset = test_dataset.batch(batch_size)

test_loss, test_accuracy = model.evaluate(test_dataset)

# Make predictions on the test dataset
predictions = model.predict(test_dataset)

# Convert one-hot encoded predictions back to class labels
predicted_labels = np.argmax(predictions, axis=1)

# Calculate accuracy
accuracy = accuracy_score(Y_test, predicted_labels)

# Calculate F1 score
f1 = f1_score(Y_test, predicted_labels, average='weighted')

# Calculate confusion matrix
conf_matrix = confusion_matrix(Y_test, predicted_labels)

print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")
print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print("Confusion Matrix:")
print(conf_matrix)
print("cohen_kappa_score :")
from sklearn.metrics import cohen_kappa_score
k = cohen_kappa_score(Y_test, predicted_labels, weights='quadratic')
print(k)


Test Loss: 11.291510581970215
Test Accuracy: 0.23843416571617126
Accuracy: 0.23843416370106763
F1 Score: 0.2394178209500662
Confusion Matrix:
[[  0 334 119   8   3]
 [  0 355 565   3   2]
 [  0 137  27   9   4]
 [  0  57  30  20  13]
 [  0   0   0   0   0]]
cohen_kappa_score :
0.13723708921782896
