In [1]:
import cv2
import os
import numpy as np
import csv
from skimage.feature.texture import graycomatrix, graycoprops 

def histogram_equalization(image):

    # Convert the image to grayscale.
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # Perform histogram equalization.
    equalized_image = cv2.equalizeHist(grayscale_image)

    # Convert the equalized image back to RGB color space.
    equalized_image = cv2.cvtColor(equalized_image, cv2.COLOR_GRAY2RGB)

    return equalized_image

#Path to the dataset folders
dataset_folder = "gaussian_filtered_images"
output_folder = "Equalized"

#List of subfolders for different stages of Diabetic Retinopathy
subfolders = ["Mild", "Moderate", "No_DR", "Proliferate_DR", "Severe"]

# Iterate through each subfolder and process the images
for subfolder in subfolders:
    subfolder_path = os.path.join(dataset_folder, subfolder)
    output_subfolder = os.path.join(output_folder, subfolder)

    # Create the output subfolder if it doesn't exist
    if not os.path.exists(output_subfolder):
        os.makedirs(output_subfolder)

    # Iterate through the images in the subfolder
    for filename in os.listdir(subfolder_path):
        image_path = os.path.join(subfolder_path, filename)

        # Read the image
        image = cv2.imread(image_path)

        # Perform histogram equalization
        equalized_image = histogram_equalization(image)

        # Save the equalized image to the output subfolder
        output_path = os.path.join(output_subfolder, filename)
        cv2.imwrite(output_path, equalized_image)

        print(f"Processed: {image_path} -> {output_path}")

Processed: gaussian_filtered_images\Mild\0024cdab0c1e.png -> Equalized\Mild\0024cdab0c1e.png
Processed: gaussian_filtered_images\Mild\00cb6555d108.png -> Equalized\Mild\00cb6555d108.png
Processed: gaussian_filtered_images\Mild\0124dffecf29.png -> Equalized\Mild\0124dffecf29.png
Processed: gaussian_filtered_images\Mild\01b3aed3ed4c.png -> Equalized\Mild\01b3aed3ed4c.png
Processed: gaussian_filtered_images\Mild\0369f3efe69b.png -> Equalized\Mild\0369f3efe69b.png
Processed: gaussian_filtered_images\Mild\03e25101e8e8.png -> Equalized\Mild\03e25101e8e8.png
Processed: gaussian_filtered_images\Mild\04ac765f91a1.png -> Equalized\Mild\04ac765f91a1.png
Processed: gaussian_filtered_images\Mild\059bc89df7f4.png -> Equalized\Mild\059bc89df7f4.png
Processed: gaussian_filtered_images\Mild\05a5183c92d0.png -> Equalized\Mild\05a5183c92d0.png
Processed: gaussian_filtered_images\Mild\0684311afdfc.png -> Equalized\Mild\0684311afdfc.png
Processed: gaussian_filtered_images\Mild\06b71823f9cd.png -> Equalized

In [2]:
def compute_glcm_features(image):
    """
    Computes GLCM features for an image.

    Args:
        image: The image to compute GLCM features on.

    Returns:
        A list of GLCM features.
    """
    # Convert the image to grayscale
    grayscale_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # Compute the GLCM
    glcm = graycomatrix(grayscale_image, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)

    # Compute GLCM properties (features)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    dissimilarity = graycoprops(glcm, 'dissimilarity')[0, 0]
    energy = graycoprops(glcm, 'energy')[0, 0]
    correlation = graycoprops(glcm, 'correlation')[0, 0]
    homogeneity = graycoprops(glcm, 'homogeneity')[0, 0]

    return [contrast, dissimilarity, energy, correlation, homogeneity]

def main():
    # Path to the dataset folders
    dataset_folder = "Equalized"
    csv_file_path = "train.csv"
    # List of subfolders for different stages of Diabetic Retinopathy
    subfolders = ["Mild", "Moderate", "No_DR", "Proliferate_DR", "Severe"]

    # Create a dictionary to store image names and corresponding GLCM features
    glcm_features_dict = {}


    # Iterate through each subfolder and process the images
    for subfolder in subfolders:
        subfolder_path = os.path.join(dataset_folder, subfolder)

        # Iterate through the images in the subfolder
        for filename in os.listdir(subfolder_path):
            image_path = os.path.join(subfolder_path, filename)

            # Read the image
            image = cv2.imread(image_path)

            # Compute GLCM features
            glcm_features = compute_glcm_features(image)

            # Add the image name and corresponding GLCM features to the dictionary
            glcm_features_dict[filename] = glcm_features
            print(filename)
            print(glcm_features)

    # Update the CSV file with the new column and GLCM feature values
    with open(csv_file_path, 'r') as f:
        csv_data = list(csv.reader(f))

        # Add the header for the new column
        csv_data[0].extend(["Contrast", "Dissimilarity", "Energy", "Correlation", "Homogeneity"])

        # Fill the rows with the corresponding GLCM feature values
        for i in range(1, len(csv_data)):
          image_name = csv_data[i][0] +".png"
          print(image_name)
          if image_name in glcm_features_dict:
                glcm_features = glcm_features_dict[image_name]
                csv_data[i].extend(glcm_features)

    # Write the updated data back to the CSV file
    with open(csv_file_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(csv_data)

    print("GLCM features added to the CSV file.")

if __name__ == "__main__":
    main()


0024cdab0c1e.png
[8075.697129244074, 66.59883488148623, 0.0979293779966428, 0.2188458199964356, 0.1378014439834101]
00cb6555d108.png
[8005.281229980781, 65.92861146700832, 0.08946375830864928, 0.23243818647223533, 0.12816757970818035]
0124dffecf29.png
[6795.883528187061, 57.459681294042284, 0.12677759858024037, 0.31754626133197533, 0.1629169030034052]
01b3aed3ed4c.png
[7299.958620275464, 61.922105221012174, 0.09974848354457595, 0.2944764883619987, 0.14205265751820328]
0369f3efe69b.png
[8388.22687780269, 66.12457959641256, 0.11170855472091949, 0.16210862880154095, 0.14028339215466631]
03e25101e8e8.png
[9059.11016575913, 72.6156910634209, 0.033983196765386525, 0.1548805146361069, 0.051170147100383256]
04ac765f91a1.png
[7918.209841447791, 64.37928411274824, 0.10049733201677243, 0.2353935210219823, 0.13603113772534023]
059bc89df7f4.png
[8038.266816143498, 66.20575752722614, 0.09196517811614556, 0.22779353280939232, 0.12436417647625309]
05a5183c92d0.png
[8463.126121076231, 67.69967168481743

In [3]:
import pandas as pd
from sklearn.utils import resample

# Read the CSV file into a pandas DataFrame
df = pd.read_csv('train.csv')

# Separate the data by classes
classes = df['diagnosis'].unique()
class_dfs = [df[df['diagnosis'] == cls] for cls in classes]

# Oversample the minority class(es)
oversampled_dfs = []
majority_class_size = max([len(cls_df) for cls_df in class_dfs])
for cls_df in class_dfs:
    oversampled_cls_df = resample(cls_df, replace=True, n_samples=majority_class_size, random_state=42)
    oversampled_dfs.append(oversampled_cls_df)

# Combine the oversampled data frames
oversampled_df = pd.concat(oversampled_dfs)

# Save the oversampled data to a new CSV file
oversampled_df.to_csv('new_train.csv', index=False)


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
