In [1]:
import os
import sys

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), ".."))
sys.path.insert(0, PROJECT_ROOT)

print("PROJECT_ROOT:", PROJECT_ROOT)

PROJECT_ROOT: d:\College\glaucoma-detection-project


In [2]:
import cv2
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage.feature import local_binary_pattern

In [3]:
RAW_G1020 = os.path.join(PROJECT_ROOT, "data", "raw", "g1020")
PROCESSED_G1020 = os.path.join(PROJECT_ROOT, "data", "processed", "g1020")

print("Exists RAW?", os.path.exists(RAW_G1020))
print("Exists PROCESSED?", os.path.exists(PROCESSED_G1020))

Exists RAW? True
Exists PROCESSED? True


In [4]:
def compute_color_features(img):
    return img[:,:,0].mean(), img[:,:,1].mean(), img[:,:,2].mean()

def compute_lbp_feature(img, radius=2, points=8):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    lbp = local_binary_pattern(gray, points, radius, method="uniform")
    return lbp.mean()

In [5]:
records = []

for split in ["training", "testing"]:
    for label_name in ["glaucoma", "normal"]:

        label = 1 if label_name == "glaucoma" else 0
        folder = os.path.join(PROCESSED_G1020, split, label_name)

        for img_name in tqdm(os.listdir(folder), desc=f"{split}-{label_name}"):

            img_path = os.path.join(folder, img_name)
            img = cv2.imread(img_path)

            if img is None:
                continue

            mean_r, mean_g, mean_b = compute_color_features(img)
            lbp_mean = compute_lbp_feature(img)

            records.append({
                "image": img_name,
                "split": split,
                "mean_r": mean_r,
                "mean_g": mean_g,
                "mean_b": mean_b,
                "lbp_mean": lbp_mean,
                "label": label
            })

training-glaucoma: 100%|██████████| 237/237 [00:01<00:00, 167.58it/s]
training-normal: 100%|██████████| 579/579 [00:03<00:00, 182.73it/s]
testing-glaucoma: 100%|██████████| 59/59 [00:00<00:00, 176.65it/s]
testing-normal: 100%|██████████| 145/145 [00:00<00:00, 173.38it/s]


In [6]:
df_g1020 = pd.DataFrame(records)

print(df_g1020["split"].value_counts())
print(df_g1020["label"].value_counts())
df_g1020.head()

split
training    816
testing     204
Name: count, dtype: int64
label
0    724
1    296
Name: count, dtype: int64


Unnamed: 0,image,split,mean_r,mean_g,mean_b,lbp_mean,label
0,image_1000.jpg,training,28.816247,88.771823,167.216697,5.707908,1
1,image_1001.jpg,training,35.614716,65.625419,130.368563,5.808175,1
2,image_1002.jpg,training,33.601323,91.714605,170.010882,5.690489,1
3,image_1003.jpg,training,28.099948,72.710379,193.541673,5.556103,1
4,image_1004.jpg,training,54.326511,79.058634,208.719687,5.531589,1


In [7]:
os.makedirs(os.path.join(PROJECT_ROOT, "outputs", "metrics"), exist_ok=True)

csv_path = os.path.join(PROJECT_ROOT, "outputs", "metrics", "features_g1020.csv")
df_g1020.to_csv(csv_path, index=False)

print("Saved to:", csv_path)

Saved to: d:\College\glaucoma-detection-project\outputs\metrics\features_g1020.csv


In [8]:
df_g1020["label"].value_counts()

label
0    724
1    296
Name: count, dtype: int64