file copied from '/home/jleick/masterArbeitProjekt/1_preprocessing_adapte_datasets/10_1_dataset-resize.ipynb'

# Notebook overview
Resizes low-resolution (trap) images to square images by padding with large patches, saves resized images, and log process in CSV files.

- Loads low split datasets and source image folders
- Extends images to square using large patch padding and saves results
- Adds 'resized' and 'resized_fail' columns to track processing and saves updated CSVs

The notebook was exported as a Python script and run in a console using Tmux to execute it.

### Imports

In [41]:
import pandas as pd
import numpy as np
from pathlib import Path

from PIL import Image, ImageFilter
import random

### Varialbe - SEED

In [42]:
SEED = 42

### Load - Folder and Dataset Path

In [43]:
# Folder to load Images
IMG_SOURCE_DIR_PATH = '/home/jleick/masterArbeitProjekt/final_release/data/images/download/low'
img_source_dir_path = Path(IMG_SOURCE_DIR_PATH)
if not img_source_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {IMG_SOURCE_DIR_PATH}")

# Folder to save Images
IMG_RESULT_DIR_PATH = '/home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/low'
img_result_dir_path = Path(IMG_RESULT_DIR_PATH)
if not img_result_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {IMG_RESULT_DIR_PATH}")

# Path to load Dataset - Image_paths
DATASET_SOURCE_DIR_PATH = '/home/jleick/masterArbeitProjekt/final_release/data/datasets/created'
dataset_source_dir_path = Path(DATASET_SOURCE_DIR_PATH)
if not dataset_source_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {DATASET_SOURCE_DIR_PATH}")

# Path to save Dataset - Image_paths
DATASET_RESULT_DIR_PATH = '/home/jleick/masterArbeitProjekt/final_release/data/datasets/created/image_resized'
dataset_result_dir_path = Path(DATASET_RESULT_DIR_PATH)
if not dataset_result_dir_path.exists():
    raise FileNotFoundError(f"Folder does not exist: {DATASET_RESULT_DIR_PATH}")

### Function - select_files and apply function

In [44]:
### Select csv files in given folder Path

def select_files(source_dir_path: Path):
    dir_contains = source_dir_path.iterdir()
    dir_filtered = [] 
    # Filter all relevant files
    for file_path in dir_contains:
        filename = file_path.name
        ### adapt conditions for specific csv files in folder
        if ('low' in filename): # and 'val' in filename):
            dir_filtered.append(filename)

    return dir_filtered

#call funktion
selected_csv_files = select_files(dataset_source_dir_path)

# Print all selected files
for folder_name in selected_csv_files:
    print(folder_name)

# DUBLICATED CODE (CODE EXIST IN OTHER FILE TOO - CODE ADAPTED)

low_ood_test.csv
low_id_test.csv


### Function - load_low_df

In [45]:
### Adaptet Funktion compared to image_resize_high.ipynb

def load_low_df(dataset_path: Path) -> pd.DataFrame:
    df = pd.read_csv(dataset_path)

    ### rename file_name sufix to .png
    df["identifier"] = df["identifier"].str.replace(".ts", ".png", regex=False)
    ### rename col_name to image_path
    df = df.rename(columns={"identifier": "image_path"})

    if 'image_path' not in df.columns:
        raise ValueError(f"'image_path' column not found in {dataset_path}")
    return df

### Function - add_column_resized_to_df

In [46]:
### add column to track existing tensors 'tensor_created'

def add_column_resized_to_df(df: pd.DataFrame):

    column_name_resized = "resized"
    if column_name_resized not in df.columns:
        df[column_name_resized] = False
        print(f'>>> {df.shape} - Added column: {column_name_resized}')

    column_name_resized_fail = 'resized_fail'
    if column_name_resized_fail not in df.columns:
        df[column_name_resized_fail] = 'NaN'
        print(f'>>> {df.shape} - Added column: {column_name_resized_fail}')
    
    return column_name_resized, column_name_resized_fail


### Test function
# column_name_embedding = add_embedding_column_to_df(data_temp)
# print( f'return: {column_name_embedding}' )
# data_temp

### Funktion - extend_img_to_square

In [47]:
def extend_img_to_square(img_path):
    img = Image.open(img_path).convert("RGB")
    w, h = img.size
    img_np = np.array(img)

    if w == h:
        return img

    size = max(w, h)
    patch_size = size//3

    canvas = Image.new("RGB", (size, size))
    random.seed(SEED)
    for y in range(0, size, patch_size):
        for x in range(0, size, patch_size):
            # Random patch position at the original image.
            src_x = random.randint(0, max(0, w - patch_size))
            src_y = random.randint(0, max(0, h - patch_size))

            patch = img.crop((src_x, src_y, src_x + patch_size, src_y + patch_size))

            canvas.paste(patch, (x, y))

    # Insert original image centered
    offset = ((size - w) // 2, (size - h) // 2)
    canvas.paste(img, offset)

    return canvas

# ATENTIONE - FUNCTION ADDAPTED
# old function name: make_square_with_texture_padding_auto

### Apply functions in loop

In [48]:
### RUN - call all Functions

def process_files(selected_csv_files:[str], img_source_dir_path, img_result_dir_path, dataset_source_dir_path, dataset_result_dir_path):

    # iterate over different datasets
    for csv_file in selected_csv_files:
        # create path to dataset
        csv_file_path = dataset_source_dir_path / csv_file

        # load df
        df = load_low_df(csv_file_path)

        # add column to df
        column_name, column_name_fail = add_column_resized_to_df(df)

        # process every image
        for index in df.index:
            file_name = df.at[index, 'image_path']

            # create source and result path for img
            file_path_img = img_source_dir_path / file_name
            file_path_result_img = img_result_dir_path / file_name

            # check if result folder exist
            file_dir_path_result = file_path_result_img.parent
            if not file_dir_path_result.exists():
                file_dir_path_result.mkdir(parents=True, exist_ok=True)
                print(f'Created new Folder to save results: {file_dir_path_result}')


            try:
                # check if img already exists
                if file_path_result_img.exists():
                    df.at[index, column_name] = True
                    df.at[index, column_name_fail] = 'no error'
                    print(f"The file was not saved. The file path already exists: {file_path_result_img}")
                    continue
                else:
                # create img if not already exist    
                    img_resized = extend_img_to_square(file_path_img)
                    img_resized.save(file_path_result_img)
                    df.at[index, column_name] = True
                    df.at[index, column_name_fail] = 'no error'
                    print(f"The file was created and saved: {file_path_result_img}")
            except Exception as e:
                df.at[index, column_name] = False
                df.at[index, column_name_fail] = str(e)
                print(f"Error occured while processing resizing the image {file_path_img}: {e}")
        
        df.to_csv(dataset_result_dir_path / csv_file, index=False, header=True)


process_files(selected_csv_files, img_source_dir_path, img_result_dir_path, dataset_source_dir_path, dataset_result_dir_path)

>>> (698, 9) - Added column: resized
>>> (698, 10) - Added column: resized_fail
The file was not saved. The file path already exists: /home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/low/fgrained-26.png
The file was not saved. The file path already exists: /home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/low/fgrained-29.png
The file was not saved. The file path already exists: /home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/low/fgrained-3340.png
The file was not saved. The file path already exists: /home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/low/fgrained-8222.png
The file was not saved. The file path already exists: /home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/low/fgrained-12171.png
The file was not saved. The file path already exists: /home/jleick/masterArbeitProjekt/final_release/data/images/adapted/resized/low/fgrained-12180.png
The file was not