## 1. Set up

In [1]:
# Import useful libraries
import os, sys, shutil, glob
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd 
from tqdm import tqdm
from PIL import Image 
from torchvision import transforms



In [13]:
# This cell only excecutes if you're running on Colab. 
if 'google.colab' in sys.modules:
  from google.colab import drive 
  drive.mount('/content/drive') # Mount Google Drive! 

  # Clone Audubon bird detection Github repo 
  #!git clone https://github.com/RiceD2KLab/Audubon_F21.git 
  !git clone -b ty37 https://github.com/RiceD2KLab/Audubon_F21.git

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Cloning into 'Audubon_F21'...
remote: Enumerating objects: 708, done.[K
remote: Counting objects: 100% (708/708), done.[K
remote: Compressing objects: 100% (417/417), done.[K
remote: Total 708 (delta 376), reused 553 (delta 263), pack-reused 0[K
Receiving objects: 100% (708/708), 56.29 MiB | 10.86 MiB/s, done.
Resolving deltas: 100% (376/376), done.


In [12]:
rm -r Audubon_F21

# 2. Load Image data

In [None]:
# upgrade gdown to solve random downloading access denial error
!pip install --upgrade --no-cache-dir gdown   
!mkdir -p './data/raw'
!gdown -q https://drive.google.com/uc?id=1zhB6_MLtvD0JCoyKYqhUx497WIvSYVUk
!unzip -q './1017_1.zip' -d './data/raw'
!gdown -q https://drive.google.com/uc?id=1clRsR5zg60FYjQ-crGx8CN88yPsUgVse
!unzip -q './1017_2.zip' -d './data/raw'
!gdown -q https://drive.google.com/uc?id=1fC4xAZJFoEccrgBhvjLMGpzLVXEcfHm6
!unzip -q './annotation_1017.zip' -d './data/raw'

#3. Augmentation

In [14]:
from Audubon_F21.utils import plotting
from Audubon_F21.utils.cropping import csv_to_dict, dict_to_csv
from Audubon_F21.data_aug.functions import dataset_aug

In [15]:
# data_dir is the path that contains both images and annotations (image: jpg; annotation: csv or bbx)
input_dir = './data/raw' # data directory folder 
# output dir is the path where you want to output new files. Please use the folder you defined above.
!mkdir -p './data/cropped'
output_dir = './data/cropped'

# Tile size
crop_height = crop_width = 640

# List of species that we want to augment
minor_species = ["Other Bird", "Tri-Colored Heron Adult", "Roseate Spoonbill Adult", "Black Crowned Night Heron Adult"]



In [None]:
dataset_aug(input_dir, output_dir, minor_species, annot_file_ext = 'bbx', crop_height = 640, crop_width = 640)

In [None]:
# Check output
print (len([name for name in os.listdir(output_dir) if os.path.isfile(os.path.join(output_dir, name))]))

In [11]:
shutil.rmtree(output_dir)