# Healthy dataset

In [None]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Specify the folder containing the images
folder_path = "C://Users//harsha//Desktop//Github//Retinal_Img_Classification//HealthyDS(Train)"
# Get a list of all image files in the folder
image_files = [f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
# Dictionary to store histogram values for each image
histograms = {}

# Dictionary to store coordinates for each image
coordinates_dict = {}

for image_file in image_files:
    # Construct the full image path
    image_path = os.path.join(folder_path, image_file)
    
    # Open the image and convert to grayscale
    image = Image.open(image_path).convert('L')

    # Convert the image to a numpy array
    image_array = np.array(image)

    # Calculate the histogram
    histogram, bin_edges = np.histogram(image_array, bins=256, range=(0, 256))

    # Save the histogram values in the dictionary
    histograms[image_file] = histogram

    # Create coordinates (x, y) pairs
    coordinates = np.column_stack((bin_edges[:-1], histogram))
    coordinates_dict[image_file] = coordinates

    # Plot the histogram
    plt.plot(bin_edges[:-1], histogram, label=image_file)

# Set plot title and labels
plt.title('Histograms of Multiple Images')
plt.xlabel('Pixel Value')
plt.ylabel('Frequency')
plt.legend()  # Add a legend to differentiate the histograms
plt.show()

# Convert the histogram dictionary to a DataFrame
histogram_df = pd.DataFrame.from_dict(histograms, orient='index', columns=[f'Pixel Value {i}' for i in range(256)])

# Save the DataFrame to a CSV file
csv_file_path = os.path.join(folder_path, 'histograms_dataset.csv')
histogram_df.to_csv(csv_file_path)

print(f"Training dataset saved to {csv_file_path}")

# Print the coordinates for each image
for image_file, coordinates in coordinates_dict.items():
    print(f"Coordinates for {image_file}:")
    print(coordinates)


# Unhealthy Dataset

In [None]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Specify the folder containing the images
folder_path = "C://Users//harsha//Desktop//Github//Retinal_Img_Classification//UnhealthyDS(Train)"
# Get a list of all image files in the folder
image_files = [f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
# Dictionary to store histogram values for each image
histograms = {}

# Dictionary to store coordinates for each image
coordinates_dict = {}

for image_file in image_files:
    # Construct the full image path
    image_path = os.path.join(folder_path, image_file)
    
    # Open the image and convert to grayscale
    image = Image.open(image_path).convert('L')

    # Convert the image to a numpy array
    image_array = np.array(image)

    # Calculate the histogram
    histogram, bin_edges = np.histogram(image_array, bins=256, range=(0, 256))

    # Save the histogram values in the dictionary
    histograms[image_file] = histogram

    # Create coordinates (x, y) pairs
    coordinates = np.column_stack((bin_edges[:-1], histogram))
    coordinates_dict[image_file] = coordinates

    # Plot the histogram
    plt.plot(bin_edges[:-1], histogram, label=image_file)

# Set plot title and labels
plt.title('Histograms of Multiple Images')
plt.xlabel('Pixel Value')
plt.ylabel('Frequency')
plt.legend()  # Add a legend to differentiate the histograms
plt.show()

# Convert the histogram dictionary to a DataFrame
histogram_df = pd.DataFrame.from_dict(histograms, orient='index', columns=[f'Pixel Value {i}' for i in range(256)])

# Save the DataFrame to a CSV file
csv_file_path = os.path.join(folder_path, 'histograms_dataset.csv')
histogram_df.to_csv(csv_file_path)

print(f"Training dataset saved to {csv_file_path}")

# Print the coordinates for each image
for image_file, coordinates in coordinates_dict.items():
    print(f"Coordinates for {image_file}:")
    print(coordinates)


# Testing Images

In [None]:
import os
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors

def compute_histogram(image_path):
    image = Image.open(image_path).convert('L')
    image_array = np.array(image)
    histogram, _ = np.histogram(image_array, bins=256, range=(0, 256))
    return histogram

# Specify the folders containing the training images
training_folder1 = "C://Users//harsha//Desktop//Github//Retinal_Img_Classification//HealthyDS(Train)"
training_folder2 = "C://Users//harsha//Desktop//Github//Retinal_Img_Classification//UnhealthyDS(Train)"

# Specify the folder containing the test images
test_folder = "C://Users//harsha//Desktop//Github//Retinal_Img_Classification//TestingyDS"

# Get the list of training image files for both folders
training_files1 = [os.path.join(training_folder1, f) for f in os.listdir(training_folder1) if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]
training_files2 = [os.path.join(training_folder2, f) for f in os.listdir(training_folder2) if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]

# Compute histograms for the training images in both folders
training_histograms1 = [compute_histogram(file) for file in training_files1]
training_histograms2 = [compute_histogram(file) for file in training_files2]

# Create DataFrames for training histograms
training_df1 = pd.DataFrame(training_histograms1, index=[os.path.basename(f) for f in training_files1])
training_df2 = pd.DataFrame(training_histograms2, index=[os.path.basename(f) for f in training_files2])

# Get the list of test image files
test_files = [os.path.join(test_folder, f) for f in os.listdir(test_folder) if f.endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]

# Initialize the NearestNeighbors model for both training sets
nbrs1 = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(training_df1.values)
nbrs2 = NearestNeighbors(n_neighbors=1, algorithm='auto').fit(training_df2.values)

# Define a threshold for detecting defects (adjust as needed)
threshold = 5000  # Example threshold

# List to store the results
results = []

# Process each test image
for test_image_path in test_files:
    # Compute histogram for the test image
    test_histogram = compute_histogram(test_image_path)
    test_df = pd.DataFrame([test_histogram], index=[os.path.basename(test_image_path)])
    
    # Find the nearest neighbor for the test sample in both training sets
    distance1, index1 = nbrs1.kneighbors(test_df.values)
    distance2, index2 = nbrs2.kneighbors(test_df.values)
    
    # Determine defect status based on nearest neighbor distances
    defect_status1 = "unhealthy" if distance1[0][0] > threshold else "healthy"
    defect_status2 = "unhealthy" if distance2[0][0] > threshold else "healthy"
    
    results.append({
        "Test Image": test_df.index[0],
        "Nearest Neighbor Training Set 1": training_df1.index[index1[0][0]],
        "Distance Training Set 1": distance1[0][0],
        "Defected Training Set 1": defect_status1,
        "Nearest Neighbor Training Set 2": training_df2.index[index2[0][0]],
        "Distance Training Set 2": distance2[0][0],
        "Defected Training Set 2": defect_status2
    })
    
    # Print the result for the current test image
    if defect_status1 == "unhealthy" or defect_status2 == "unhealthy":
        defect_status = "unhealthy"
        print(f"Test image {test_df.index[0]} is {defect_status}")
    else:
        defect_status = "healthy"
        print(f"Test image {test_df.index[0]} is {defect_status}")

# Print the results
print("Results:", results)

# Save the results to an Excel file
results_df = pd.DataFrame(results)
excel_file_path = os.path.join(test_folder, 'test_results.xlsx')
results_df.to_excel(excel_file_path, index=False)

print(f"Results saved to {excel_file_path}")

# Optional: Plot the histogram of the last test image
if test_files:
    last_test_image_path = test_files[-1]
    defected_image = Image.open(last_test_image_path).convert('L')
    defected_histogram, _ = np.histogram(np.array(defected_image), bins=256, range=(0, 256))
    plt.plot(defected_histogram)
    plt.title(f'Histogram of Test Image: {os.path.basename(last_test_image_path)}')
    plt.xlabel('Pixel Value')
    plt.ylabel('Frequency')
    plt.show()
