In [17]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [18]:
csv_file = "eye_diagrams_metadata.csv"
df = pd.read_csv(csv_file)

In [19]:
input_folder = "Dataset"
output_folder = "preprocessed"
train_folder = os.path.join(output_folder, "train")
test_folder = os.path.join(output_folder, "test")

In [20]:
IMG_SIZE = (224, 224)  # Resize all images to 224x224
TEST_SIZE = 0.2  # 20% of images will be for testing

# Ensure output folders exist
for folder in [output_folder, train_folder, test_folder]:
    os.makedirs(folder, exist_ok=True)

# Split dataset into train & test
train_df, test_df = train_test_split(df, test_size=TEST_SIZE, stratify=df["OSNR_dB"], random_state=42)

In [21]:
def process_and_save(df_subset, subset_name, output_path):
    for _, row in tqdm(df_subset.iterrows(), total=len(df_subset), desc=f"Processing {subset_name} images"):
        filename = row["Filename"]
        input_path = os.path.join(input_folder, filename)
        output_file = os.path.join(output_path, filename.replace(".png", ".npy"))  # Fix output file path

        # Read image
        image = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)  # Convert to grayscale
        if image is None:
            print(f"⚠️ Skipping: {filename} (File not found)")
            continue
        
        # Resize image
        image = cv2.resize(image, IMG_SIZE)

        # Normalize pixel values (0 to 1)
        image = image / 255.0

        # Save processed image as numpy array
        np.save(output_file, image)  # Save as .npy (faster for ML)

# Process train and test images
process_and_save(train_df, "Train", train_folder)
process_and_save(test_df, "Test", test_folder)

print("✅ Image preprocessing completed!")
print(f"Total Train Images: {len(train_df)}")
print(f"Total Test Images: {len(test_df)}")

Processing Train images: 100%|██████████| 489/489 [00:06<00:00, 77.59it/s]
Processing Test images: 100%|██████████| 123/123 [00:01<00:00, 79.49it/s]

✅ Image preprocessing completed!
Total Train Images: 489
Total Test Images: 123



