# 3rdParties

In [None]:
!pip install rasterio

In [None]:
import os
import glob
import rasterio
from tqdm import tqdm
from rasterio import Affine

# Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive', True)

# Clip Patch

## General Information

In [None]:
output_width, output_height = 256, 256
working_folder = '/content/drive/My Drive/GUM'
img_to_clip_folder = os.path.join(working_folder, 'datasets')
img_after_clipping_folder = os.path.join(working_folder, 'datasets_clipped')

assert os.path.exists(img_to_clip_folder) and os.path.exists(img_after_clipping_folder)

## General Function

In [None]:
def remove_black_border(input_path, output_path, output_width, output_height):
    with rasterio.open(input_path) as src:
        img_width, img_height = src.width, src.height

        top_left_x, top_left_y, _, _ = cal_center_bounding_box(
            img_width, img_height, output_width, output_height
        )

        cropped_data = src.read(
            window=rasterio.windows.Window(top_left_x, top_left_y, output_width, output_height)
        )

        cropped_transform = calculate_cropped_transform(src.transform, top_left_x, top_left_y)

        profile = create_cropped_profile(src.profile, output_width, output_height, cropped_transform)

        write_raster(output_path, profile, cropped_data)

def cal_center_bounding_box(input_img_width, input_img_height,
                            output_img_width, output_img_height):
    assert input_img_width >= output_img_width and input_img_height >= output_img_height

    x_offset, y_offset = (input_img_width - output_img_width) / 2., (input_img_height - output_img_height) / 2.
    top_left_x, top_left_y = 0 + x_offset, 0 + y_offset
    down_right_x, down_right_y = input_img_width - x_offset, input_img_height - y_offset

    return round(top_left_x), round(top_left_y), round(down_right_x), round(down_right_y)


def calculate_cropped_transform(src_transform, top_left_x, top_left_y):
    return src_transform * Affine.translation(top_left_x, top_left_y)


def create_cropped_profile(src_profile, output_width, output_height, cropped_transform):
    profile = src_profile.copy()
    profile.update(
        width=output_width,
        height=output_height,
        transform=cropped_transform,
    )
    return profile


def write_raster(output_path, profile, data, datasets=None):
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    with rasterio.open(output_path, 'w', **profile) as dst:
        if datasets is None:
            # Handle the first case (cropped_data)
            dst.write(data)
        else:
            # Handle the second case (mosaic)
            band_index = 1
            for dataset in datasets:
                for band in range(1, dataset.count + 1):
                    dst.write(dataset.read(band), band_index)
                    band_index += 1

EXCLUDE_SUFFIXES = ['.aux.xml', '.cpg', '.dbf', '.prj', '.sbn', '.sbx', '.shx', 'shp.xml']
def get_files_without_suffixes(input_folder, exclude_suffixes=EXCLUDE_SUFFIXES):
    if input_folder == None:
        return []
    folder_file_list = glob.glob(os.path.join(input_folder, '*.*').replace(' ', ''))
    return [f for f in folder_file_list if not any(f.endswith(suffix) for suffix in exclude_suffixes)]

def get_filenames_without_suffixes(file_list):
    base_filenames = {os.path.splitext(os.path.basename(f))[0] for f in file_list}
    return base_filenames

## Clip patches

In [None]:
file_list = get_files_without_suffixes(img_to_clip_folder)
file_list_checked = get_files_without_suffixes(img_after_clipping_folder)

file_names = get_filenames_without_suffixes(file_list)
file_names_checked = get_filenames_without_suffixes(file_list_checked)

files_to_process = file_names - file_names_checked

for file_name in tqdm(files_to_process):
    input_path = os.path.join(img_to_clip_folder, file_name + '.tif')
    output_path = os.path.join(img_after_clipping_folder, file_name + '.tif')
    assert os.path.exists(input_path)

    remove_black_border(input_path, output_path, output_width, output_height)