## 1. Set up

In [1]:
# Import useful libraries
import os, sys, shutil, glob
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd 
from tqdm import tqdm
from PIL import Image 
from torchvision import transforms



In [2]:
# This cell only excecutes if you're running on Colab. 
if 'google.colab' in sys.modules:
  from google.colab import drive 
  drive.mount('/content/drive') # Mount Google Drive! 

  # Clone Audubon bird detection Github repo 
  #!git clone https://github.com/RiceD2KLab/Audubon_F21.git 
  !git clone -b SP22 https://github.com/RiceD2KLab/Audubon_F21.git

Mounted at /content/drive
Cloning into 'Audubon_F21'...
remote: Enumerating objects: 1040, done.[K
remote: Counting objects: 100% (1040/1040), done.[K
remote: Compressing objects: 100% (618/618), done.[K
remote: Total 1040 (delta 599), reused 757 (delta 391), pack-reused 0[K
Receiving objects: 100% (1040/1040), 60.20 MiB | 10.94 MiB/s, done.
Resolving deltas: 100% (599/599), done.


In [None]:
#rm -r Audubon_F21

# 2. Load Image data

In [None]:
# upgrade gdown to solve random downloading access denial error
!pip install --upgrade --no-cache-dir gdown   
!mkdir -p './data/raw'
!gdown -q https://drive.google.com/uc?id=1zhB6_MLtvD0JCoyKYqhUx497WIvSYVUk
!unzip -q './1017_1.zip' -d './data/raw'
!gdown -q https://drive.google.com/uc?id=1clRsR5zg60FYjQ-crGx8CN88yPsUgVse
!unzip -q './1017_2.zip' -d './data/raw'
!gdown -q https://drive.google.com/uc?id=1fC4xAZJFoEccrgBhvjLMGpzLVXEcfHm6
!unzip -q './annotation_1017.zip' -d './data/raw'

#3. Augmentation

In [4]:
from Audubon_F21.data_aug.functions import dataset_aug

In [5]:
# input_dir is the path that contains both images and annotations (image: jpg; annotation: csv or bbx)
input_dir = './data/raw' 
# output dir is the path where you want to output new files. Please use the folder you defined above.
!mkdir -p './data/cropped'
output_dir = './data/cropped'
#!mkdir -p '/content/drive/My Drive/Audubon/aug_data'
#output_dir ='/content/drive/My Drive/Audubon/aug_data'

# Tile size
crop_height = crop_width = 640

# List of species that we want to augment
minor_species = ["Other Bird", "Tri-Colored Heron Adult", "Roseate Spoonbill Adult", "Black Crowned Night Heron Adult"]

# Minimum portion of a bounding box being accepted in a subimage
overlap = 0.2

# Maximum portion of non-minor creatures existing in a subimage
thres = 0.3


In [7]:
dataset_aug(input_dir, output_dir, minor_species, overlap, thres)

Cropping files: 100%|██████████| 87/87 [01:56<00:00,  1.34s/it]


In [8]:
# Check output
print (len([name for name in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, name))]))

340


In [None]:
# Delete output folder
shutil.rmtree(output_dir)