# Proprocess

### Imports

In [5]:
import pandas as pd
import cv2
import os

## Pre-processing

### Merged bbox and annotations

In [6]:
bbox = pd.read_csv("content/bbox.csv")
annotations = pd.read_csv("content/annotations.csv")

bbox["image_name_clean"] = bbox["image_name"].str.lower().str.strip()
annotations["path_clean"] = annotations["path"].str.lower().str.strip()

merged = pd.merge(bbox, annotations, left_on="image_name_clean", right_on="path_clean", how="inner")

print("merged: ", len(merged))

merged.head()

merged:  160


Unnamed: 0,label_name,bbox_x,bbox_y,bbox_width,bbox_height,image_name,image_width,image_height,image_name_clean,identity,path,orientation,date,path_clean
0,head,1214,888,645,561,leftIMG_0328.jpeg,5472,3648,leftimg_0328.jpeg,SE010,leftIMG_0328.jpeg,left,24_06_2021,leftimg_0328.jpeg
1,head,1511,966,1110,736,leftIMG_0344.jpg,5472,3648,leftimg_0344.jpg,G16-03,leftIMG_0344.jpg,left,14_06_2019,leftimg_0344.jpg
2,head,847,2974,846,855,leftIMG_0597.jpg,3648,5472,leftimg_0597.jpg,G16-03,leftIMG_0597.jpg,left,21_06_2018,leftimg_0597.jpg
3,head,1692,850,1110,1026,leftIMG_0715.jpg,5472,3648,leftimg_0715.jpg,G18-04,leftIMG_0715.jpg,left,15_06_2019,leftimg_0715.jpg
4,head,1569,785,723,523,leftIMG_0751.jpeg,5472,3648,leftimg_0751.jpeg,t573,leftIMG_0751.jpeg,left,25_06_2021,leftimg_0751.jpeg


### Cropped turtle head

In [7]:
import cv2
import os

output_dir = "dataset_turtle_cropped"
os.makedirs(output_dir, exist_ok=True)

for _, row in merged.iterrows():
    img_path = os.path.join("content/images", row["image_name"])
    if not os.path.exists(img_path):
        continue

    img = cv2.imread(img_path)
    if img is None:
        continue

    x, y, w, h = int(row["bbox_x"]), int(row["bbox_y"]), int(row["bbox_width"]), int(row["bbox_height"])
    crop = img[y:y+h, x:x+w]

    # create folder
    id_folder = os.path.join(output_dir, row["identity"])
    os.makedirs(id_folder, exist_ok=True)

    # save
    save_path = os.path.join(id_folder, row["image_name"])
    cv2.imwrite(save_path, crop)

### dataset_turtle_preprocessed
อันนี้ทำไปแค่
- grayscale
- Noise reduction (Gaussian blur)
- Histogram equalization
- Resize to standard size

In [8]:
import cv2
import os
from pathlib import Path

cropped_dir = "dataset_turtle_cropped"

preprocessed_dir = "dataset_turtle_preprocessed"
os.makedirs(preprocessed_dir, exist_ok=True)

standard_size = (224, 224)

for identity_folder in os.listdir(cropped_dir):
    cropped_identity_path = os.path.join(cropped_dir, identity_folder)

    if not os.path.isdir(cropped_identity_path):
        continue

    preprocessed_identity_path = os.path.join(preprocessed_dir, identity_folder)
    os.makedirs(preprocessed_identity_path, exist_ok=True)

    for img_name in os.listdir(cropped_identity_path):
        img_path = os.path.join(cropped_identity_path, img_name)

        if not img_name.lower().endswith(('.jpg', '.jpeg', '.png')):
            continue

        img = cv2.imread(img_path)
        if img is None:
            continue

        # grayscale
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

        # Noise reduction (Gaussian blur)
        denoised = cv2.GaussianBlur(gray, (5, 5), 0)

        # Histogram equalization
        equalized = cv2.equalizeHist(denoised)

        # Resize to standard size
        resized = cv2.resize(equalized, standard_size)

        # Save
        save_path = os.path.join(preprocessed_identity_path, img_name)
        cv2.imwrite(save_path, resized)

        print(f"Preprocessed: {identity_folder}/{img_name}")

print(f"Pre-processing complete!!!!!!! Images saved to {preprocessed_dir}")

Pre-processing complete!!!!!!! Images saved to dataset_turtle_preprocessed


# Data Augmentation

> Add blockquote



### dataset_turtle_preprocessed_Augmentation
อันนี้เพิ่ม
- Augmentation เข้าไป (ทำ 3 ครั้งต่อ 1 ภาพ)

In [9]:
import cv2
import os
import albumentations as A

preprocessed_dir = "dataset_turtle_preprocessed"
output_dir = "dataset_turtle_preprocessed_Augmentation"
os.makedirs(output_dir, exist_ok=True)

STANDARD_SIZE = (224, 224)

# Augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=30, p=0.5)
])

for identity_folder in os.listdir(preprocessed_dir):
    identity_path = os.path.join(preprocessed_dir, identity_folder)
    if not os.path.isdir(identity_path):
        continue

    output_identity_path = os.path.join(output_dir, identity_folder)
    os.makedirs(output_identity_path, exist_ok=True)

    for img_name in os.listdir(identity_path):
        img_path = os.path.join(identity_path, img_name)
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue

        # Noise reduction + CLAHE
        img = cv2.GaussianBlur(img, (5,5), 0)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
        img = clahe.apply(img)

        # Resize
        img = cv2.resize(img, STANDARD_SIZE)

        # Save original preprocessed
        save_path = os.path.join(output_identity_path, img_name)
        cv2.imwrite(save_path, img)

        # Augmentation (3 versions)
        for i in range(3):
            augmented = transform(image=img)
            aug_img = augmented['image']
            aug_name = img_name.replace(".", f"_aug{i}.")
            cv2.imwrite(os.path.join(output_identity_path, aug_name), aug_img)

print(f"All images preprocessed and augmented! Saved to {output_dir}")


All images preprocessed and augmented! Saved to dataset_turtle_preprocessed_Augmentation


  original_init(self, **validated_kwargs)
