In [None]:
import os
os.getcwd()

In [None]:
os.chdir("C:\\COLON_CANCER DATASET")

In [None]:
pip  install albumentations

In [None]:
import os
import cv2
import albumentations as A
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
import random

def augment_images(input_dir, output_dir, num_augmented=5):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    transform = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.3),
        A.Rotate(limit=45, p=0.7),
        A.RandomBrightnessContrast(p=0.5),
        A.GaussianBlur(blur_limit=(3, 7), p=0.3),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=30, p=0.5),
        A.ElasticTransform(alpha=1, sigma=50, p=0.3),  
        A.GridDistortion(p=0.3),
        A.CLAHE(clip_limit=2, tile_grid_size=(8,8), p=0.3)
    ])
    
    for img_name in tqdm(os.listdir(input_dir)):
        img_path = os.path.join(input_dir, img_name)
        image = cv2.imread(img_path)
        if image is None:
            continue
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        
        
        
        for i in range(num_augmented):
            augmented = transform(image=image)['image']
            augmented = np.clip(augmented, 0, 255).astype(np.uint8)  
            
            save_path = os.path.join(output_dir, f"aug_{i}_{img_name}")
            cv2.imwrite(save_path, cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR))
            
        
    
if __name__ == "__main__":
    input_folder = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\COMBINED_PICS"  
    output_folder = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\COMBINED_PICS_AUG"  
    augment_images(input_folder, output_folder, num_augmented=15)


In [None]:
import os
import random
import shutil

def split_dataset(input_dir, output_base_dir, train_ratio=0.7, val_ratio=0.1, test_ratio=0.2):
    assert train_ratio + val_ratio + test_ratio == 1
    
    train_dir = os.path.join(output_base_dir, "train")
    val_dir = os.path.join(output_base_dir, "val")
    test_dir = os.path.join(output_base_dir, "test")
    
    for folder in [train_dir, val_dir, test_dir]:
        if not os.path.exists(folder):
            os.makedirs(folder)
    
    image_files = os.listdir(input_dir)
    random.shuffle(image_files)
    
    train_split = int(len(image_files) * train_ratio)
    val_split = int(len(image_files) * (train_ratio + val_ratio))
    
    train_files = image_files[:train_split]
    val_files = image_files[train_split:val_split]
    test_files = image_files[val_split:]
    
    for file in train_files:
        shutil.copy(os.path.join(input_dir, file), os.path.join(train_dir, file))
    for file in val_files:
        shutil.copy(os.path.join(input_dir, file), os.path.join(val_dir, file))
    for file in test_files:
        shutil.copy(os.path.join(input_dir, file), os.path.join(test_dir, file))
    
    print(f"Dataset split complete: {len(train_files)} train, {len(val_files)} val, {len(test_files)} test")

if __name__ == "__main__":
    input_folder = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\COMBINED_PICS_AUG" 
    dataset_split_folder = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS"  
    
    split_dataset(input_folder, dataset_split_folder)


In [None]:
import os
import pandas as pd

def create_excel(image_dir, output_excel):
    label_map = {"BLI": 0, "FICE": 1, "LCI": 2, "NBI": 3, "WLI": 4}
    data = []
    
    for img_name in os.listdir(image_dir):
        for key in label_map.keys():
            if key in img_name:
                data.append([img_name, label_map[key]])
                break
    
    df = pd.DataFrame(data, columns=["Image_Name", "Label"])
    df.to_excel(output_excel, index=False)
    print(f"Excel file saved at {output_excel}")

if __name__ == "__main__":
    image_folder = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\COMBINED_PICS_AUG" 
    excel_output_path = "image_labels.xlsx"
    create_excel(image_folder, excel_output_path)


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm
import pickle


model = DenseNet201(weights='imagenet', include_top=False, pooling='avg')


image_folder = 'C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\train'


output_folder = os.path.join(os.path.dirname(image_folder), 'Feature_PKLs')
os.makedirs(output_folder, exist_ok=True)


def extract_features(image_path, model):
    img = image.load_img(image_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    return features


for img_file in tqdm(os.listdir(image_folder)):
    img_path = os.path.join(image_folder, img_file)
    if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
        features = extract_features(img_path, model)
        feature_vector = features.flatten()
        
        
        feature_filename = os.path.join(output_folder, f"{os.path.splitext(img_file)[0]}_features_densenet.pkl")
        with open(feature_filename, 'wb') as f:
            pickle.dump(feature_vector, f)
        print(f"Features saved for {img_file}!")

print("Feature extraction complete. Features saved to individual .pkl files in the folder:", output_folder)


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm
import pickle


model = DenseNet201(weights='imagenet', include_top=False, pooling='avg')


image_folder = 'C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\test'


output_folder = os.path.join(os.path.dirname(image_folder), 'Test_Feature_PKLs')
os.makedirs(output_folder, exist_ok=True)


def extract_features(image_path, model):
    img = image.load_img(image_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    return features


for img_file in tqdm(os.listdir(image_folder)):
    img_path = os.path.join(image_folder, img_file)
    if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
        features = extract_features(img_path, model)
        feature_vector = features.flatten()
        
       
        feature_filename = os.path.join(output_folder, f"{os.path.splitext(img_file)[0]}_features_densenet.pkl")
        with open(feature_filename, 'wb') as f:
            pickle.dump(feature_vector, f)
        print(f"Features saved for {img_file}!")

print("Feature extraction complete. Features saved to individual .pkl files in the folder:", output_folder)


In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.densenet import preprocess_input
from tqdm import tqdm
import pickle


model = DenseNet201(weights='imagenet', include_top=False, pooling='avg')


image_folder = 'C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\val'


output_folder = os.path.join(os.path.dirname(image_folder), 'Validation_Feature_PKLs')
os.makedirs(output_folder, exist_ok=True)


def extract_features(image_path, model):
    img = image.load_img(image_path, target_size=(224, 224))
    img_data = image.img_to_array(img)
    img_data = np.expand_dims(img_data, axis=0)
    img_data = preprocess_input(img_data)
    features = model.predict(img_data)
    return features


for img_file in tqdm(os.listdir(image_folder)):
    img_path = os.path.join(image_folder, img_file)
    if img_path.lower().endswith(('.png', '.jpg', '.jpeg')):
        features = extract_features(img_path, model)
        feature_vector = features.flatten()
        
       
        feature_filename = os.path.join(output_folder, f"{os.path.splitext(img_file)[0]}_features_densenet.pkl")
        with open(feature_filename, 'wb') as f:
            pickle.dump(feature_vector, f)
        print(f"Features saved for {img_file}!")

print("Feature extraction complete. Features saved to individual .pkl files in the folder:", output_folder)


In [None]:
import pickle

filename="C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\Train_Feature_PKLs\\aug_1_WLI_01_features_densenet.pkl"

with open(filename,'rb') as f:
    data=pickle.load(f)

print(data.shape)

In [None]:
import pickle
import numpy as np

directory="C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\Train_Feature_PKLs"

con_features=[]
count=0

for files in os.listdir(directory):
    filename=os.path.join(directory,files)
    count+=1
    with open(filename,'rb') as f:
        data=pickle.load(f)
        l=len(data)
        con_features.append(data)

con_features_arr=np.concatenate(con_features,axis=0)
print(con_features_arr.shape)
con_features_mod=con_features_arr.reshape(count,l)

print(con_features_mod.shape)

np.save("CONCATENATED_FEATURES_TRAIN.npy",con_features_mod)

print("Filename saved succesfully!")
        

In [None]:
import pickle
import numpy as np

directory="C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\Validation_Feature_PKLs"

con_features=[]
count=0

for files in os.listdir(directory):
    filename=os.path.join(directory,files)
    count+=1
    with open(filename,'rb') as f:
        data=pickle.load(f)
        l=len(data)
        con_features.append(data)

con_features_arr=np.concatenate(con_features,axis=0)
print(con_features_arr.shape)
con_features_mod=con_features_arr.reshape(count,l)

print(con_features_mod.shape)

np.save("CONCATENATED_FEATURES_VALIDATE.npy",con_features_mod)

print("Filename saved succesfully!")
        

In [None]:
import pickle
import numpy as np

directory="C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\Test_Feature_PKLs"

con_features=[]
count=0

for files in os.listdir(directory):
    filename=os.path.join(directory,files)
    count+=1
    with open(filename,'rb') as f:
        data=pickle.load(f)
        l=len(data)
        con_features.append(data)

con_features_arr=np.concatenate(con_features,axis=0)
print(con_features_arr.shape)
con_features_mod=con_features_arr.reshape(count,l)

print(con_features_mod.shape)

np.save("CONCATENATED_FEATURES_TEST.npy",con_features_mod)

print("Filename saved succesfully!")
        

In [None]:
import os
import pandas as pd


df = pd.read_excel("image_labels.xlsx")


directory = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\train"
train_files = set(os.listdir(directory)) 


label_train = df[df["Image_Name"].isin(train_files)]["Label"].tolist()

print(f"Total matched labels: {len(label_train)}")

label_train_arr=np.array(label_train)

np.save("Training_labels.npy",label_train_arr)

print("Training labels saved successfully!!")


In [None]:
import os
import pandas as pd


df = pd.read_excel("image_labels.xlsx")


directory = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\val"
validate_files = set(os.listdir(directory)) 


label_validate = df[df["Image_Name"].isin(train_files)]["Label"].tolist()

print(f"Total matched labels: {len(label_validate)}")

label_validate_arr=np.array(label_validate)

np.save("Validation_labels.npy",label_validate_arr)

print("Validation labels saved successfully!!")


In [None]:
import os
import pickle
import numpy as np


directory = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\Test_Feature_Pkls"


modalities = ["bli", "wli", "nbi", "fice", "lci"]


for modality in modalities:
    con_features = []
    count = 0

    for file in os.listdir(directory):
        if modality in file.lower():  
            filename = os.path.join(directory, file)
            count += 1
            with open(filename, 'rb') as f:
                data = pickle.load(f)
                l = len(data)
                con_features.append(data)

    if count > 0:
       
        con_features_arr = np.concatenate(con_features, axis=0)
        con_features_mod = con_features_arr.reshape(count, l)

        
        np.save(f"CONCATENATED_FEATURES_TEST_{modality.upper()}.npy", con_features_mod)

        print(f"Saved {modality.upper()} test features successfully! Shape: {con_features_mod.shape}")
    else:
        print(f"No files found for {modality.upper()}.")


In [None]:
import os
import numpy as np
import pandas as pd


df = pd.read_excel("image_labels.xlsx")


directory = "C:\\COLON_CANCER DATASET\\PolypDB\\PolypDB_modality_wise\\AUG_PICS\\test"


modalities = ["bli", "wli", "nbi", "fice", "lci"]

test_files = set(os.listdir(directory))


for modality in modalities:
    
    modality_test_files = {file for file in test_files if modality in file.lower()}

    
    label_test = df[df["Image_Name"].isin(modality_test_files)]["Label"].tolist()

   
    label_test_arr = np.array(label_test)
    np.save(f"{modality.upper()}_TEST_LABELS.npy", label_test_arr)

   
    print(f"Total matched {modality.upper()} test labels: {len(label_test)}")
    print(f"{modality.upper()} test labels saved successfully!")


In [None]:
import os
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks, regularizers
from tensorflow.keras.utils import to_categorical
import numpy as np
import pandas as pd


X_train = np.load("CONCATENATED_FEATURES_TRAIN.npy")  
y_train = np.load("Training_labels.npy")    
X_val = np.load("CONCATENATED_FEATURES_VALIDATE.npy")
y_val = np.load("Validation_labels.npy")

print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
print(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")


num_classes = 5  
y_train = to_categorical(y_train, num_classes=num_classes)
y_val = to_categorical(y_val, num_classes=num_classes)


def build_mlp(input_shape=(1920,), num_classes=5):
    inputs = layers.Input(shape=input_shape)
    x = layers.Dense(1024, kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(1e-4))(inputs)
    x = layers.BatchNormalization(momentum=0.5)(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.6)(x)
    
    x = layers.Dense(512, kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization(momentum=0.5)(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.6)(x)
    
    x = layers.Dense(256, kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization(momentum=0.5)(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.55)(x)
    
    x = layers.Dense(128, kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization(momentum=0.5)(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.5)(x)
    
    x = layers.Dense(64, kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization(momentum=0.5)(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.5)(x)
    
    x = layers.Dense(32, kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization(momentum=0.5)(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.45)(x)

    x = layers.Dense(16, kernel_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(1e-4))(x)
    x = layers.BatchNormalization(momentum=0.5)(x)
    x = layers.ReLU()(x)
    x = layers.Dropout(0.35)(x)
    
    outputs = layers.Dense(num_classes, activation='softmax')(x)  
    
    model = models.Model(inputs, outputs)
    optimizer = optimizers.AdamW(learning_rate=0.01, weight_decay=1e-4, clipnorm=1.0)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

if __name__ == "__main__":
    model = build_mlp()
    model.summary()
    
    lr_scheduler = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    
    history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, batch_size=32, callbacks=[lr_scheduler])
    
    
    log_data = []
    for epoch in range(0, len(history.history['accuracy']), 10):
        log_data.append([
            epoch,
            history.history['accuracy'][epoch],
            history.history['loss'][epoch],
            history.history['val_accuracy'][epoch],
            history.history['val_loss'][epoch]
        ])

    if (len(history.history['accuracy']) - 1) % 10 != 0:
        last_epoch = len(history.history['accuracy']) - 1
        log_data.append([
            last_epoch,
            history.history['accuracy'][last_epoch],
            history.history['loss'][last_epoch],
            history.history['val_accuracy'][last_epoch],
            history.history['val_loss'][last_epoch]
        ])
    
   
    log_df = pd.DataFrame(log_data, columns=['Epoch', 'Training Accuracy', 'Training Loss', 'Validation Accuracy', 'Validation Loss'])
    print(log_df.to_string(index=False))


In [None]:
import matplotlib.pyplot as plt

model.save("mlp_model_Densenet201.h5")

print("Model saved successfully!!")

plt.figure(figsize=(10, 6))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Training and Validation Accuracy for DenseNet201')
plt.show()

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import load_model
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, log_loss


model = load_model("mlp_model_Densenet201.h5")


modalities = ["BLI", "WLI", "FICE", "LCI", "NBI"]
label_order = ["BLI", "FICE", "LCI", "NBI", "WLI"]
label_map = {modality: idx for idx, modality in enumerate(label_order)}


def plot_confusion_matrix(y_true, y_pred, modality):
    cm = confusion_matrix(y_true, y_pred, labels=range(5))
    
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
                xticklabels=label_order, yticklabels=label_order)
    plt.xlabel("Predicted Label")
    plt.ylabel("True Label")
    plt.title(f"Confusion Matrix - {modality}")
    plt.show()


l_dense = []


for modality in modalities:
    X_test = np.load(f"CONCATENATED_FEATURES_TEST_{modality}.npy")
    
   
    modality_label = label_map[modality]
    y_test = np.full(shape=(len(X_test),), fill_value=modality_label)

    num_classes = 5  
    y_test_one_hot = tf.keras.utils.to_categorical(y_test, num_classes=num_classes)

    y_pred_probs = model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = np.argmax(y_test_one_hot, axis=1)

   
    accuracy = accuracy_score(y_true, y_pred)
    logloss = log_loss(y_test_one_hot, y_pred_probs)
    l_dense.append(logloss)
    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, average='weighted')

   
    print(f"Results for {modality}:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-score: {f1:.4f}")
    print(f"Log Loss: {logloss:.4f}")
    print(f"Unique labels in y_test: {np.unique(y_test)}")
    print("-" * 40)

    
    plot_confusion_matrix(y_true, y_pred, modality)


print(f"Average Log_loss for the DENSENET201: {sum(l_dense)/5:.4f}")
