### Install dependencies

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from IPython.display import clear_output

### GPU Testing

In [None]:
# Check if GPU is available
is_gpu_available = tf.test.is_gpu_available()

# Clear the cell output
clear_output()

# Print GPU availability
print('GPU is available' if is_gpu_available else 'GPU is not available')

### Load data

We rename the `img` column to `image_id` for clarity. The `rot` column was renamed to `is_rotten` for the same reason.

In [None]:
def load_train_df() -> None:
    return pd.read_csv('data/train.csv', dtype={
        'img': int, # Image ID
        'x': int, # X coordinate of the top-left corner of the bounding box
        'y': int, # Y coordinate of the top-left corner of the bounding box
        'w': int, # Width of the bounding box
        'h': int, # Height of the bounding box
        'rot': int, # Whether the image shows a rotten pear (1) or not (0)
    }, index_col=0).rename(columns={
        'img': 'image_id',
        'rot': 'is_rotten',
    })

In [None]:
def load_test_df() -> None:
    return pd.read_csv('data/test.csv', dtype={
        'img': int, # Image ID
        'x': int, # X coordinate of the top-left corner of the bounding box
        'y': int, # Y coordinate of the top-left corner of the bounding box
        'w': int, # Width of the bounding box
        'h': int, # Height of the bounding box
    }, index_col=0).rename(columns={
        'img': 'image_id',
    })

In [None]:
def safe_create_directory(directory: str) -> None:
    """
    Safely creates a directory if it does not exist.

    Args:
    - directory (str): The path to the directory to create.

    Returns:
    - None
    """
    try:
        os.makedirs(directory, exist_ok=True)
    except OSError as e:
        print(f'Error creating directory {directory}: {e}')

In [None]:
def get_image_path(image_id: int, directory: str) -> str:
    """
    Get the path to an image given its ID and the directory it is stored in.

    Args:
    - image_id (int): The ID of the image.
    - directory (str): The directory the image is stored in.

    Returns:
    - str: The path to the image.
    """
    return f"{directory}{image_id:08d}.png"

In [None]:
def load_image(image_id: int, directory: str) -> np.ndarray:
    """
    Load an image given its ID and the directory it is stored in.

    Args:
    - image_id (int): The ID of the image.
    - directory (str): The directory the image is stored in.

    Returns:
    - np.ndarray: The image as a NumPy array.
    """
    image_path = get_image_path(image_id, directory)
    image = cv2.imread(image_path)

    return image

In [None]:
# def get_cropped_image(row: pd.Series, directory: str) -> np.ndarray:
#     """
#     Crop an image given its id and the coordinates of the bounding box.

#     Args:
#     - row: A row containing the image id, coordinates, and dimensions.
#     - directory (str): The directory the image is stored in.

#     Returns:
#     - np.ndarray: The cropped image as a NumPy array.
#     """
#     image = load_image(row['image_id'], directory)
#     left_x, original_top_y, box_width, box_height = row[['x', 'y', 'w', 'h']]
#     image_height = image.shape[0]

#     right_x = left_x + box_width
#     original_bottom_y = original_top_y + box_height

#     bottom_y = image_height - original_top_y
#     top_y = image_height - original_bottom_y
#     print(f"new_top_y = {image_height} - {original_bottom_y} = {top_y}")
#     print(f"new_bottom_y = {image_height} - {original_top_y} = {bottom_y}")

#     print(f"Image shape: {image.shape}")
#     print(f"Box size: width: {box_width} and height: {box_height}")
#     print(f"Original coordinates: ({left_x}, {original_top_y}) ({right_x}, {original_bottom_y})")
#     print(f"New coordinates are: ({left_x}, {top_y}) ({right_x}, {bottom_y})")

#     cropped_image = image[top_y:bottom_y, left_x:right_x]

#     return cropped_image

In [None]:
def get_cropped_image(row: pd.Series, directory: str) -> np.ndarray:
    """
    Crop an image given its id and the coordinates of the bounding box.

    Args:
    - row: A row containing the image id, coordinates, and dimensions.
    - directory (str): The directory the image is stored in.

    Returns:
    - np.ndarray: The cropped image as a NumPy array.
    """
    image = load_image(row['image_id'], directory)
    left_x, original_top_y, box_width, box_height = row[['x', 'y', 'w', 'h']]

    right_x = left_x + box_width
    original_bottom_y = original_top_y + box_height

    cropped_image = image[original_top_y:original_bottom_y, left_x:right_x]

    return cropped_image

In [None]:
# Define the directories for raw and cropped images
IMAGE_DATA_DIRECTORIES = {
    'raw': {
        'train': 'data/raw/train_images/',
        'test': 'data/raw/test_images/',
    },
    'cropped': {
        'train': 'data/cropped/train_images/',
        'test': 'data/cropped/test_images/',
    },
}

for directory in IMAGE_DATA_DIRECTORIES['raw'].values():
    safe_create_directory(directory)

for directory in IMAGE_DATA_DIRECTORIES['cropped'].values():
    safe_create_directory(directory)

In [None]:
train_df = load_train_df()
test_df = load_test_df()

In [None]:

# Define a function to apply to each row of the DataFrame
def convert_to_cropped(row: pd.Series) -> None:
    # Get the image
    image = get_cropped_image(row, IMAGE_DATA_DIRECTORIES['raw']['train'])
    # Show the image in RGB format
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.show()
    # Store the image in the cropped directory
    # cv2.imwrite(get_image_path(image_id, IMAGE_DATA_DIRECTORIES['cropped']['train']), image)

for i in range(1,5):
    convert_to_cropped(train_df.iloc[i])