In [None]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

# Reading the CSV file containing the metadata for images
csv_path = '/Users/pillala/Documents/capstone project/messidor-2/messidor_data.csv'  # Update this to the actual path of the CSV file
data = pd.read_csv(csv_path)

# Display the first few rows of the CSV file to confirm it's loaded correctly
print(data.head())

# Function to display images side by side for comparison
def display_images(image1, image2, title1="Image 1", title2="Image 2"):
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
    plt.title(title1)
    
    plt.subplot(1, 2, 2)
    plt.imshow(cv2.cvtColor(image2, cv2.COLOR_BGR2RGB))
    plt.title(title2)
    
    plt.show()

# Function to apply Adaptive Histogram Equalization (AHE) to the image
def apply_ahe(img):
    img_yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV)
    ahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    img_yuv[:, :, 0] = ahe.apply(img_yuv[:, :, 0])
    img_ahe = cv2.cvtColor(img_yuv, cv2.COLOR_YUV2BGR)
    return img_ahe

# Function to apply Gamma Correction to the image
def apply_gamma_correction(img, gamma=1.2):
    inv_gamma = 1.0 / gamma
    table = np.array([(i / 255.0) ** inv_gamma * 255 for i in np.arange(0, 256)]).astype("uint8")
    img_gamma = cv2.LUT(img, table)
    return img_gamma

# Function to apply CLAHE (Contrast Limited Adaptive Histogram Equalization)
def apply_clahe(img):
    img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    l, a, b = cv2.split(img_lab)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    l = clahe.apply(l)
    img_clahe = cv2.merge((l, a, b))
    img_clahe = cv2.cvtColor(img_clahe, cv2.COLOR_LAB2BGR)
    return img_clahe

# Preprocessing pipeline that sequentially applies AHE, Gamma Correction, and CLAHE
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    img_ahe = apply_ahe(img)
    img_gamma = apply_gamma_correction(img_ahe, gamma=1.2)
    img_clahe = apply_clahe(img_gamma)
    
    if np.mean(img_clahe) < np.mean(img):
        img_clahe = cv2.rotate(img_clahe, cv2.ROTATE_180)
    
    return img, img_clahe

# Specify the input and output folders
image_folder = '/Users/pillala/Documents/capstone project/messidor-2/images'  # Update this to the actual path where images are stored
output_folder = '/Users/pillala/Documents/capstone project/messidor-2/preprocessed_images_1/'  # Update this to your desired output folder
segmented_folder = '/Users/pillala/Documents/capstone project/messidor-2/segmented_images/'  # Folder to save the segmented images

os.makedirs(output_folder, exist_ok=True)
os.makedirs(segmented_folder, exist_ok=True)

# Function to detect and segment the optic disc area by converting to grayscale and masking the brightest part
def remove_optic_disc(img):
    # Convert the image to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # Apply GaussianBlur to reduce noise and improve contour detection
    blurred = cv2.GaussianBlur(gray, (15, 15), 0)
    
    # Find the brightest region by applying a threshold
    _, thresh = cv2.threshold(blurred, 200, 255, cv2.THRESH_BINARY)
    
    # Find contours in the thresholded image
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Create a mask for the optic disc
    mask = np.ones(img.shape[:2], dtype="uint8") * 255  # Start with a white mask
    
    # If contours are found, draw the largest one (assumed to be the optic disc)
    if contours:
        # Find the largest contour by area
        max_contour = max(contours, key=cv2.contourArea)
        
        # Create a circular mask based on the largest contour
        x, y, w, h = cv2.boundingRect(max_contour)
        radius = int(1 * max(w, h))
        center = (x + w // 2, y + h // 2)
        cv2.circle(mask, center, radius, 0, -1)  # Draw a filled black circle on the mask
    
    # Apply the mask to the image to remove the optic disc
    img_no_disc = cv2.bitwise_and(img, img, mask=mask)
    return img_no_disc

# Function to preprocess and segment images in the dataset
def preprocess_and_segment_all_images(image_folder, output_folder, segmented_folder):
    for index, row in data.iterrows():
        image_id = row['image_id']
        image_path = os.path.join(image_folder, image_id)
        
        # Preprocess the image
        original_img, processed_img = preprocess_image(image_path)
        
        # Save the preprocessed image
        preprocessed_output_path = os.path.join(output_folder, image_id)
        cv2.imwrite(preprocessed_output_path, processed_img)
        
        # Remove the optic disc from the processed image
        img_no_disc = remove_optic_disc(processed_img)
        
        # Save the segmented image without the optic disc
        segmented_output_path = os.path.join(segmented_folder, image_id)
        cv2.imwrite(segmented_output_path, img_no_disc)

        # Display the images for the first entry
        if index == 0:
            display_images(original_img, processed_img, title1="Original Image", title2="Processed Image")
            display_images(processed_img, img_no_disc, title1="Processed Image", title2="Processed Image without Optic Disc")
        
        # Progress update for every 100 images
        if index % 100 == 0:
            print(f'Processed {index} images...')

# Execute preprocessing and segmentation
preprocess_and_segment_all_images(image_folder, output_folder, segmented_folder)

print(f'All images processed and saved in: {output_folder} and {segmented_folder}')