In [2]:
import pandas as pd
from pathlib import Path
from PIL import Image
import numpy as np

CSV_PATH = Path("/home/bioeos/Documents/project_hub/madagascar-aina/csv/2_clean.csv")
CSV_OUTPUT_PATH = Path("/home/bioeos/Documents/project_hub/madagascar-aina/csv/3_aina_thumbnails.csv")
OUTPUT_FOLDER = Path("/home/bioeos/Documents/project_hub/madagascar-aina/dataset/thumbnails")


In [3]:
def parse_cpce_file(filepath):
    
    rows = []
    try:
        with open(filepath, "r", encoding="latin1") as file:
            for row in file:
                rows.append(row.replace("\n", ""))
        
        min_, max_ = [float(b) for b in rows[1].split(",")]

        nb_point = int(rows[5])
        points_value = []
        for i in range (6 , nb_point + 6):
            x, y = [float(a) for a in rows[i].split(',')]
            value = rows[i + nb_point].split(',')[1].replace('"', '')
            if x >= max_ or x <= min_ or y >= max_ or y <= min_: continue # FIlter outter point
            points_value.append({"x": x, "y": y, "v": value})
        
        return points_value, min_, max_
    except Exception as e:
        print(filepath, e)
        return [], 0, 0

In [4]:
df_data = pd.read_csv(CSV_PATH)
    
OUTPUT_FOLDER.mkdir(exist_ok=True, parents=True)

data = {"FileName": [], "value": []}
for i, row in df_data.iterrows():
    im = Image.open(row["relative_file_path"])
    points, min_, max_ = parse_cpce_file(row["cpce_file"])
    ratio_x = max_ / im.width
    ratio_y = max_ / im.height
    # Define the center coordinates
    centers = [(point['x'] / ratio_x, point['y'] / ratio_y, point['v']) for point in points]  # Replace with your actual center coordinates

    # Size of each thumbnail (half the width and height)
    thumb_half_width = im.width / 10  # Adjust based on your desired thumbnail size
    thumb_half_height = im.height / 10  # Adjust based on your desired thumbnail size

    # # Create a directory to save thumbnails

    print(row["relative_file_path"], max_, min_, ratio_x, ratio_y, im.size, thumb_half_width, thumb_half_height)
    # # Split the image into thumbnails based on the centers
    thumbs = []
    for i, (cx, cy, v) in enumerate(centers):
        # Calculate the box for each thumbnail
        left = max(cx - thumb_half_width, 0)
        top = max(cy - thumb_half_height, 0)
        right = min(cx + thumb_half_width, im.width)
        bottom = min(cy + thumb_half_height, im.height)

        # Crop and save the thumbnail
        thumbnail = im.crop((left, top, right, bottom))
        thumb_filename = f"{Path(row['FileName']).stem}_{i}.jpg"
        thumbnail.save(Path(OUTPUT_FOLDER, thumb_filename))
        thumbs.append(thumbnail)
        data["FileName"].append(thumb_filename)
        data["value"].append(v)

df = pd.DataFrame(data)

# One-hot encode the 'value' column
df_encoded = pd.get_dummies(df['value'], dtype=np.int32)

# Concatenate the FileName column with the encoded values
df_final = pd.concat([df['FileName'], df_encoded], axis=1)

df_final.to_csv(CSV_OUTPUT_PATH, index=False)

/home/bioeos/Documents/project_hub/madagascar-aina/dataset/clean_data/FRAMES/Toliara_T1_DSCN0001.JPG 7679.108 0.0 14.9982578125 14.9982578125 (512, 512) 51.2 51.2
/home/bioeos/Documents/project_hub/madagascar-aina/dataset/clean_data/FRAMES/Toliara_T1_DSCN0002.JPG 7681.784 0.0 15.003484375 15.003484375 (512, 512) 51.2 51.2
/home/bioeos/Documents/project_hub/madagascar-aina/dataset/clean_data/FRAMES/Toliara_T1_DSCN0003.JPG 7676.433 0.0 14.993033203125 14.993033203125 (512, 512) 51.2 51.2
/home/bioeos/Documents/project_hub/madagascar-aina/dataset/clean_data/FRAMES/Toliara_T1_DSCN0004.JPG 7684.455 0.0 15.008701171875 15.008701171875 (512, 512) 51.2 51.2
/home/bioeos/Documents/project_hub/madagascar-aina/dataset/clean_data/FRAMES/Toliara_T1_DSCN0005.JPG 7679.108 0.0 14.9982578125 14.9982578125 (512, 512) 51.2 51.2
/home/bioeos/Documents/project_hub/madagascar-aina/dataset/clean_data/FRAMES/Toliara_T1_DSCN0006.JPG 7681.784 0.0 15.003484375 15.003484375 (512, 512) 51.2 51.2
/home/bioeos/Docum