# Save crops from bounding boxes

This notebook saves the crops from recorded bounding boxes. 

In [None]:
import pandas as pd
import os
from PIL import Image

In [None]:
def initialise_session(credentials_file="credentials.json"):
    """
    Load AWS and API credentials from a configuration file and initialise an AWS session.

    Args:
        credentials_file (str): Path to the credentials JSON file.

    Returns:
        boto3.Client: Initialised S3 client.
    """
    with open(credentials_file, encoding="utf-8") as config_file:
        aws_credentials = json.load(config_file)
    session = boto3.Session(
        aws_access_key_id=aws_credentials["AWS_ACCESS_KEY_ID"],
        aws_secret_access_key=aws_credentials["AWS_SECRET_ACCESS_KEY"],
        region_name=aws_credentials["AWS_REGION"],
    )
    client = session.client("s3", endpoint_url=aws_credentials["AWS_URL_ENDPOINT"])
    return client

import boto3
import json
from boto3.s3.transfer import TransferConfig

client = initialise_session('./credentials.json')

# Transfer configuration for optimised S3 download
transfer_config = TransferConfig(
    max_concurrency=20,  # Increase the number of concurrent transfers
    multipart_threshold=8 * 1024 * 1024,  # 8MB
    max_io_queue=1000,
    io_chunksize=262144,  # 256KB
)

In [None]:
def download_crop(download_path, download_dir, box, crop_name):
    save_path = f"{download_dir}/{os.path.basename(download_path).replace('.jpg', '')}_{crop_name}.jpg"

    # if save_path already exists, skip
    if os.path.exists(save_path):
        return


    image = Image.open(download_path).convert("RGB")

    original_width, original_height = image.size
    # crop the image
    x_min = int(int(box[0]) *300 / original_width)
    y_min = int(int(box[1]) *300 / original_height)
    x_max = int(int(box[2]) *300 / original_width)
    y_max = int(int(box[3]) *300 / original_height)

    crop_box = [x_min, y_min, x_max, y_max]

    image = image.crop((x_min, y_min, x_max, y_max))

    image.save(save_path)


In [None]:
# for each row, download the image and save it to the folder
def download_dep_crops(dep, bucket_name):
    download_dir = f'./data/singapore/crops/{dep}'
    df = pd.read_csv(f'./data/singapore/{dep}_cleaned.csv')
    df['keys'] = df['image_path'].apply(lambda x: f"{dep}/snapshot_images/{os.path.basename(x)}")


    # subset to only those ending with jpg
    image_set = list(set(df['keys']))
    image_set = [x for x in image_set if x.endswith('.jpg')]

    print(dep, bucket_name)
    print(f"- Downloading for {len(image_set)} images and {df.shape[0]} crops")

    # make dir if doesnt exist
    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    for key in image_set:
        download_path = os.path.join(download_dir, os.path.basename(key))
        client.download_file(bucket_name, key, download_path, Config=transfer_config)

        df_subset = df[df['keys'] == key]

        for i, row in df_subset.iterrows():
            download_crop(download_path, download_dir,
                        box=[row['x_min'], row['y_min'], row['x_max'], row['y_max']],
                        crop_name=row['crop_status'].replace(' ', '_'))

        os.remove(download_path)

    # compress download_dir
    os.system(f"zip -r {download_dir}.zip {download_dir}")
    os.system(f"rm -rf {download_dir}")

In [None]:
# list the dirs in ./data/singapore/dep*
deps = os.listdir('./data/singapore')
deps = [dep for dep in deps if dep.startswith('dep')]

# subset to dirs only, not files
deps = [dep for dep in deps if os.path.isdir(f'./data/singapore/{dep}')]