In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
def crop_image(image, x1, y1, x2, y2):

  left = x1
  upper = y1
  right = x1 + x2
  lower = y1 + y2

  cropped_image = image[upper:lower, left:right]
  return cropped_image

def create_bbox_df(image_names_file, bbox_file_path):

  # read files and create dfs
  img_df = pd.read_csv(image_names_path, delim_whitespace = True, index_col=False, header=None)
  img_df['folder_name'] = img_df[img_df.columns[1]].apply(lambda x : x.split('/')[0])
  bbox_df = pd.read_csv(bbox_file_path, delim_whitespace = True, index_col=False, header=None, dtype = np.int64).iloc[:,1:]

  # join dfs
  df = pd.concat([img_df.iloc[:,1:], bbox_df], axis = 1)
  df.columns = ['class_name', 'folder_name', 'x1', 'y1', 'x2', 'y2']
  return df


def get_cropped_images(src_path, dst_path, bbox_df):

  if not os.path.exists(dst_path):
    os.makedirs(dst_path)

  # count the number of images written
  count = 0
  for i in range(len(bbox_df)):
    image_name = bbox_df['class_name'][i]
    folder_name = bbox_df['folder_name'][i]

    # if the class folder does not exist, create folder
    dst_folder_path = os.path.join(dst_path, folder_name)
    if not os.path.exists(dst_folder_path):
        os.makedirs(dst_folder_path)

    # get bounding box parameters
    x1, y1, x2, y2 = bbox_df['x1'][i],bbox_df['y1'][i], bbox_df['x2'][i], bbox_df['y2'][i]

    src_img_path = os.path.join(src_path, image_name)
    dst_img_path = os.path.join(dst_path, image_name)

    image = cv2.imread(src_img_path, cv2.IMREAD_COLOR)
    cropped_img = crop_image(image, x1, y1, x2, y2)

    cv2.imwrite(dst_img_path, cropped_img)
    count+=1

  print('Cropping Completed.\n')
  print(f'{count} files copied.')
  return dst_path


In [None]:
# provide paths
img_src_path = '/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/images'
img_dst_path = '/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/images_cropped'
bbox_file_path = '/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/bounding_boxes.txt'
image_names_path = '/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/images.txt'

In [None]:
# create df with image name and bbox info
bbox_df = create_bbox_df(image_names_path, bbox_file_path)

In [None]:
bbox_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11788 entries, 0 to 11787
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   class_name   11788 non-null  object
 1   folder_name  11788 non-null  object
 2   x1           11788 non-null  int64 
 3   y1           11788 non-null  int64 
 4   x2           11788 non-null  int64 
 5   y2           11788 non-null  int64 
dtypes: int64(4), object(2)
memory usage: 552.7+ KB


In [None]:
# crop and store images in a different folder
get_cropped_images(img_src_path, img_dst_path, bbox_df)

Cropping Completed.

11788 files copied.


'/content/drive/MyDrive/Colab Notebooks/Data/CUB_200_2011/images_cropped'