In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np
import glob
import shutil
from tqdm.auto import tqdm
box_df = pd.read_csv('../input/whale2-cropped-dataset/train2.csv')
box_df = box_df[~box_df.box.isna()].reset_index()
box_df.head()
import albumentations as A

img_size = 512
transform = A.Compose([
        A.Resize(int(img_size * 1.15), int(img_size * 1.15)),
        A.RandomCrop(img_size, img_size, p=1.0),
    ])

In [None]:
def enlarge_box(box, scale, img_w, img_h):
    """Enlarge bounding box by {scale} time"""
    w, h = box[2] - box[0], box[3] - box[1]
    x, y = int(scale * w) // 2, int(scale * h) // 2
    box[0] = max(box[0] - x, 0)
    box[2] = min(box[2] + x, img_w)
    box[1] = max(box[1] - y, 0)
    box[3] = min(box[3] + y, img_h)
    
    return box

sample = box_df.iloc[np.random.choice(box_df.index)]
box = sample.box
img = cv2.imread(f'../input/happy-whale-and-dolphin/train_images/{sample["image"]}')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
if not pd.isna(box):
    box = [int(x) for x in box.split(" ")]
    box = enlarge_box(box, 1.1, img.shape[1], img.shape[0])
else:
    box = []

plt.figure(figsize=(15, 15))
f, ax = plt.subplots(2, 1) 
cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (255, 0, 0), 5)
crop = img[box[1]:box[3],box[0]:box[2],:]
# aug = transform(image=crop)['image']
ax[0].imshow(img)
ax[1].imshow(crop)
# ax[2].imshow(aug)
img.shape

In [None]:
os.makedirs('data/train_images', exist_ok=True)
os.makedirs('data/test_images', exist_ok=True)
# shutil.copy('../input/happy-whale-and-dolphin/train.csv', 'data/train.csv')

MAX_SIZE = 640

def copy_imgs(src, dest, box_csv):
    box_df = pd.read_csv(f'../input/whale2-cropped-dataset/{box_csv}')
    apply_box = 0
    nan_box = 0
    bar = tqdm(glob.glob(f'{src}/*'))
    for img_file in bar:
        img = cv2.imread(img_file)
        h, w = img.shape[:2]
        if max(h, w) >= 600:
            box = box_df.loc[box_df.image == os.path.basename(img_file)].box.values[0]
            if not pd.isna(box):
                box = [int(x) for x in box.split(" ")]
                box = enlarge_box(box, 1.1, img.shape[1], img.shape[0])
                img = img[box[1]:box[3],box[0]:box[2],:]
                apply_box += 1
            else:
                nan_box += 1

        if min(h, w) > MAX_SIZE:
            r = MAX_SIZE / min(h, w)
            img = cv2.resize(img, 
                             (int(w * r), int(h * r)),
                             interpolation=cv2.INTER_LINEAR)

            #img = cv2.resize(img, (MAX_SIZE, MAX_SIZE), interpolation=cv2.INTER_LINEAR)
        cv2.imwrite(f'{dest}/{os.path.basename(img_file)}', img)
        bar.set_description(f'Apply box: {apply_box} - nan box: {nan_box}')

    return apply_box, nan_box
            
print(copy_imgs('../input/happy-whale-and-dolphin/train_images', 'data/train_images', 'train2.csv'))
print(copy_imgs('../input/happy-whale-and-dolphin/test_images', 'data/test_images', 'test2.csv'))

In [None]:
plt.imshow(cv2.imread(np.random.choice(glob.glob('../input/happy-whale-and-dolphin/test_images/*')))[:,:,::-1])

In [None]:
!zip -r data.zip /kaggle/working/data/*
!rm -rf /kaggle/working/data