In [None]:
#Uncomment below to install animaloc
#!pip install -e .

In [1]:
# Imports
from pathlib import Path
import os
import random
import rasterio
from rasterio.windows import Window
import PIL
import torchvision
import numpy
import cv2
import skimage
from itertools import product
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
from albumentations import PadIfNeeded
import pandas as pd
from tqdm import tqdm
import shutil
from animaloc.data import ImageToPatches, PatchesBuffer, save_batch_images

# Tiling data for annotation

### For GeoTiffs

In [None]:
def save_patches(img_path:Path,dest_dir:Path,tilesize=512):

    # create directory
    if os.path.exists(dest_dir):
        shutil.rmtree(dest_dir)
        os.mkdir(dest_dir)
        print("emptying directory:", dest_dir)
    else:
        os.mkdir(dest_dir)
        print("creating directory:", dest_dir)
    
    # window reading with rasterio
    handler = rasterio.open(img_path)
    height, width = handler.meta['height'], handler.meta['width']
    coordinates = dict()
    count = 0
    for i,j in tqdm(product(list(range(0,height,tilesize)),list(range(0,width,tilesize)))):
        window = Window(j, i, tilesize, tilesize)
        
        try:
            chunk = handler.read(window=window)
            c,h,w = chunk.shape
            xmin, xmax = j, j+w
            ymin, ymax = i, i+h
            x_center = 0.5*(xmin+xmax)
            y_center = 0.5*(ymin+ymax)
            n_unique = np.unique(chunk).size
            if n_unique == 1:
                continue
            count += 1
            filename = img_path.name.split('.')[0] + f"-{j}-{i}.png"
            coordinates[count] = [xmin,xmax,ymin,ymax,x_center,y_center,filename]

            # save to disk
            chunk = np.transpose(chunk,(1,2,0))
            skimage.io.imsave(dest_dir/filename,chunk)          
                
        except Exception as e:
            print("Failed for",(i,j),e)
            pass

    cols = ['xmin','xmax','ymin','ymax','x_center','y_center','filename']
    coordinates = pd.DataFrame.from_dict(coordinates,
                                        orient='index',
                                        columns=cols)
    coordinates.to_csv(dest_dir/f"coordinates{img_path.name.split('.')[0]}.csv",index=False)
    handler.close()
    return coordinates

In [None]:
# for datapath in Path("../annotation_data/camp6/").iterdir():
datapath = Path("../annotation_data/camp6/150m_RGB.tif")
dest_dir = datapath.parent/(datapath.name.split('.')[0])
# coordinates =  save_patches(datapath,dest_dir,tilesize=1024)

In [None]:
coordinates

### For Images (jpg, rgb etc.)
Using Herdnet code

In [3]:
!pwd # current working dir

/home/ubuntu/workspace/HerdNet


In [None]:
# tiling validation data
Path("../general_dataset/val_splits").mkdir(exist_ok=True,parents=True)

!python ./tools/patcher.py ../general_dataset/val 640 640 100 \
    ../general_dataset/val_splits \
    -csv ../general_dataset/groundtruth/csv/val_big_size_A_B_E_K_WH_WB.csv \
    -min 0.0 -all False

In [None]:
# tiling training data
Path("../general_dataset/train_splits").mkdir(exist_ok=True,parents=True)

!python ./tools/patcher.py ../general_dataset/train 640 640 100 \
    ../general_dataset/train_splits \
    -csv ../general_dataset/groundtruth/csv/train_big_size_A_B_E_K_WH_WB.csv \
    -min 0.0 -all False

In [None]:
# tiling test data
Path("../general_dataset/test_splits").mkdir(exist_ok=True,parents=True)

!python ./tools/patcher.py ../general_dataset/test 640 640 100 \
    ../general_dataset/test_splits \
    -csv ../general_dataset/groundtruth/csv/test_big_size_A_B_E_K_WH_WB.csv \
    -min 0.0 -all False

In [6]:
# tiling Annotation data

# Destination of splits
Path("../EBP-Lindanda-cam0-splits").mkdir(exist_ok=True,parents=True)

!python ./tools/patcher.py ../EBP-Lindanda-cam0 512 512 64 \
    ../EBP-Lindanda-cam0-splits \
    -min 0.0 -all True

Exporting patches: 100%|██████████████████| 1226/1226 [1:02:52<00:00,  3.08s/it]


In [None]:
# tiling data from Savmap dataset
# path_gt = '../savmap_dataset_v2/gt.csv'
# df_gt = pd.read_csv(path_gt)
# df_gt['labels'] = 0
# df_gt.rename(columns={'filename':'images',
#                       'xmin':'x_min',
#                       'xmax':'x_max',
#                       'ymin':'y_min',
#                       'ymax':'y_max'
#                       },inplace=True)

# df_gt.to_csv(path_gt,index=False,sep=',')

Path("../savmap_dataset_v2/train_splits").mkdir(exist_ok=True,parents=True)

!python ./tools/patcher.py ../savmap_dataset_v2/images 640 640 100 \
    ../savmap_dataset_v2/train_splits \
    -csv ../savmap_dataset_v2/gt.csv \
    -min 0.0 -all False

### Sampling emtpy and non empty images

In [None]:
# sample training data

directory = Path(r"../savmap_dataset_v2/train/")
labels = directory/'labels'
dest_images = directory/'images_nonempty'
source_images = directory/'images'

# create destination images
dest_images.mkdir(exist_ok=True)

# move non empty images
# -- Uncomment to run > Be careful
# for file in labels.iterdir():
#     img_name = file.name.split('.')[0]+'.JPG'
#     if (source_images/img_name).exists():
#         os.rename(src=source_images/img_name,
#                 dst=dest_images/img_name)

# # and empty 
# -- Uncomment to run > Be careful!!
# num_non_empty = len(list(labels.iterdir()))
# num_empty_target = num_non_empty
# empty_images = list(source_images.iterdir())
# random.seed(41) # seeding for reproducibility
# random.shuffle(empty_images) # shuffle
# for file in empty_images[:num_empty_target]:
#     img_name = file.name
#     os.rename(src=file,
#                 dst=dest_images/img_name)

# rename folders
# os.rename(src=source_images,dst=directory/'images_empty')
# os.rename(src=dest_images,dst=directory/'images')


In [None]:
len(list((directory/'images_empty').iterdir())),\
    len(list((directory/"images").iterdir())),\
        len(list((directory/"labels").iterdir()))


# Visualizing data

In [None]:
import albumentations as A
from animaloc.datasets import CSVDataset
from animaloc.data.transforms import MultiTransformsWrapper, DownSample, PointsToMask, FIDT
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# patch_size = 640
# num_classes = 7
down_ratio = 1

val_dataset = CSVDataset(
    csv_file = '../wildlife_localizer_data/val/gt.csv',
    root_dir = '../wildlife_localizer_data/val',
    albu_transforms = [A.Normalize(p=1.0)],
    end_transforms = [DownSample(down_ratio=down_ratio, anno_type='bbox')]
    )

In [None]:
val_dataset.anno_type

In [None]:
labels = pd.read_csv('../wildlife_localizer_data/val/gt.csv')
labels.head()

In [None]:
np.sort(labels['labels'].unique())

In [None]:
# loader = DataLoader(dataset = val_dataset, batch_size = 1, shuffle = False)

In [None]:
images, targets = val_dataset[0]

In [None]:
targets

In [None]:
# img = np.transpose(images.numpy(),(1,2,0))
# plt.imshow(img)

# Creating YOLO labels

In [None]:
# Images dimensions
height,width = 640, 640

# Create label directory
for directory in Path("../wildlife_localizer_data/").iterdir():
    if not directory.is_dir():
        continue 
    labels_dir = Path(os.path.join(directory,'labels'))
    labels_dir.mkdir(exist_ok=True,parents=False) # create directory if it does not exist
    labels = pd.read_csv(os.path.join(directory,'gt.csv'))
    #-- Saving labels in YOLO format
    for img_filename,df_group in tqdm(labels.groupby(by='images'),desc=directory.name):
        df_group['width'] = (df_group['x_max'] - df_group['x_min'])/width 
        df_group['height'] = (df_group['y_max'] - df_group['y_min'])/height
        df_group['x'] = (0.5*(df_group['x_min'] + df_group['x_max']))/width # x center
        df_group['y'] = (0.5*(df_group['y_min'] + df_group['y_max']))/height # y center
        df_group['labels'] = 0 

        # print('\n\n',directory.name,'\n',df_group[['labels','x','y','width','height']])
        # break

        # uncomment to save labels files
        labels_filename     = img_filename.split('.')[0] + '.txt'
        if len(df_group)>0:
            cols = ['labels','x','y','width','height']
            df_group[cols].to_csv(os.path.join(labels_dir,labels_filename),
                                    sep=" ",
                                    header=False,
                                    index=False)


In [45]:
# for savmap data
directory = Path("../savmap_dataset_v2/train/")
labels_dir = Path(os.path.join(directory,'labels'))
labels_dir.mkdir(exist_ok=True,parents=False) # create directory if it does not exist
labels = pd.read_csv(os.path.join(directory,'gt.csv'))
#-- Saving labels in YOLO format
for img_filename,df_group in tqdm(labels.groupby(by='images'),desc=directory.name):
    df_group['width'] = (df_group['x_max'] - df_group['x_min'])/width 
    df_group['height'] = (df_group['y_max'] - df_group['y_min'])/height
    df_group['x'] = (0.5*(df_group['x_min'] + df_group['x_max']))/width # x center
    df_group['y'] = (0.5*(df_group['y_min'] + df_group['y_max']))/height # y center
    df_group['labels'] = 0 

    # print('\n\n',directory.name,'\n',df_group[['labels','x','y','width','height']])
    # break

    # uncomment to save labels files
    labels_filename = img_filename.split('.')[0] + '.txt'
    if len(df_group)>0:
        cols = ['labels','x','y','width','height']
        df_group[cols].to_csv(os.path.join(labels_dir,labels_filename),
                                    sep=" ",
                                    header=False,
                                    index=False)

train: 100%|██████████| 2750/2750 [00:09<00:00, 303.35it/s]


In [46]:
# Control splitting
num_missing = 0
num_found = 0
num_total = 0
for path in Path("../savmap_dataset_v2/train/images").iterdir():
    filename = path.name.split('.')[0]
    labelpath = Path("../savmap_dataset_v2/train/labels")/(filename + '.txt')
    num_total += 1
    if not labelpath.exists():
        num_missing += 1
    else:
        num_found += 1

num_missing,num_found,num_total

(30607, 2750, 33357)

In [None]:
labels = pd.read_csv(os.path.join(directory,'gt.csv'))
labels['labels'].unique()

# Savmap data
Saving bounding boxes in VOC format

In [None]:
import geopandas as gpd
from shapely.geometry import Point, Polygon
import torch 
from torchvision.utils import draw_bounding_boxes
from torchvision.ops import nms
from torchvision.transforms import PILToTensor
from PIL import Image

In [None]:
annotations_path = Path("../savmap_dataset_v2/savmap_annotations_2014.shp")

In [None]:
data = gpd.read_file(annotations_path)
data.head()

In [None]:
# data.loc[data['IMAGEUUID']=='0a3ed15cfab4453795564140e8fde8ba']

In [7]:
# uuid = '0a3ed15cfab4453795564140e8fde8ba'
# polygons = data.loc[data['IMAGEUUID']==uuid,'geometry']
# polygons

In [None]:
bboxes = dict()
count = 0
pil_to_tensor = PILToTensor()
for uuid in tqdm(np.unique(data.IMAGEUUID),desc='Getting bbox'):

    # identifier, filenmae w/o suffix
    uuid = str(uuid)

    # load img as tensor
    path_to_img = f"../savmap_dataset_v2/images/{uuid}.JPG"
    img_pil = Image.open(path_to_img)
    img_tensor = pil_to_tensor(img_pil)

    # get boxes
    polygons = data.loc[data['IMAGEUUID']==uuid,'geometry']
    boxes = np.array([list(polygon.bounds) for polygon in polygons])
    boxes = torch.from_numpy(boxes).float()

    # apply non max suppression o discard overlaping polygons
    areas = abs((boxes[:,2] - boxes[:,0])*(boxes[:,3] - boxes[:,1]))
    indices = nms(boxes=boxes,
                  scores= 1/areas, # discarding larger bbox when they overlap
                  iou_threshold=0.1)
    bbox = boxes[indices].numpy()

    # save bbox
    start, end = count, count+bbox.shape[0]
    for idx,i in enumerate(range(start, end)):
        bboxes[i] = [uuid,] + bbox[idx].tolist()
    count = end
# print('retained bbox indes:',indices)

In [None]:
columns=['filename','x_min','y_min','x_max','y_max']
gt_bboxes = pd.DataFrame.from_dict(data=bboxes,
                       orient='index',
                       columns=columns)

for col in columns:
    if col != 'images':
        gt_bboxes[col] = gt_bboxes[col].apply(int)
    else:
        gt_bboxes[col] = gt_bboxes[col].apply(lambda x: f"{x}.JPG")

gt_bboxes['labels'] = 0 # class
gt_bboxes.head()

In [None]:
# discarding invalid coordinates
# gt_bboxes.loc[gt_bboxes.min(axis=1,numeric_only=True)>=0]

In [None]:
# Example: drawing bounding boxes

# load img as tensor
filename = gt_bboxes['filename'].sample(1).iloc[0]
path_to_img = f"../savmap_dataset/{filename}"
img_pil = Image.open(path_to_img)
img_tensor = PILToTensor()(img_pil)

boxes = gt_bboxes.loc[gt_bboxes['filename'] == filename, ['x_min','y_min','x_max','y_max']].to_numpy()
boxes = torch.from_numpy(boxes)

img_with_box = draw_bounding_boxes(img_tensor,
                                   boxes=boxes,
                                   colors="red",
                                   width=5).numpy().transpose((1,2,0))

plt.figure(figsize=(15,7))
plt.imshow(img_with_box)
plt.title(filename)
plt.show()

# Inference with YOLO

In [38]:
# Imports
from ultralytics import YOLO
import ultralytics
import torch
from pathlib import Path
from animaloc.data import ImageToPatches
from PIL import Image
import torch.nn.functional as F
from sahi import AutoDetectionModel
from sahi.models.yolov8 import Yolov8DetectionModel
from sahi.utils.cv import read_image
from sahi.predict import get_sliced_prediction, predict

In [40]:
# load model
path_to_weights = Path('../yolo-runs/exp4/weights/best.pt')
# model = YOLO('yolov8s.pt',task='detect').load(path_to_weights)

In [69]:
# # load image
# dir_images = Path("../savmap_dataset_v2/train/images").iterdir()
# img = Image.open(next(dir_images))

# patcher = ImageToPatches(img,size=(640,640),overlap=0)
# patches = patcher.make_patches()

# # model.predict()
# print(len(patcher))

In [70]:
# _ = patcher.show()

In [68]:
# patches.shape

In [67]:
# plt.imshow(patches[9].numpy().transpose(1,2,0))

In [41]:
# using sahi
# detection_model = AutoDetectionModel.from_pretrained(
#     model_type='yolov8',
#     model_path=path_to_weights,
#     image_size=640,
#     confidence_threshold=0.3,
#     device="cpu", # or 'cuda:0'
# )

detection_model = Yolov8DetectionModel(model_path=path_to_weights,
                                      confidence_threshold=0.3,
                                      device="cpu" # or 'cuda:0')
)

In [46]:
img = Image.open("../dummy_image.jpg")
result = get_sliced_prediction(img, 
                               detection_model,
                               slice_height=640,
                               slice_width=640,
                               overlap_height_ratio=0.2,
                               overlap_width_ratio=0.2,
                               postprocess_type='NMS',
                               )

Performing prediction on 16 number of slices.


In [116]:
result.export_visuals(export_dir="./tmp/",hide_labels=True,hide_conf=False)

In [47]:
result.to_coco_annotations()

[{'image_id': None,
  'bbox': [383.18536376953125, 422.7789306640625, 92.33203125, 76.5166015625],
  'score': 0.7291696071624756,
  'category_id': 0,
  'category_name': 'wildlife',
  'segmentation': [],
  'iscrowd': 0,
  'area': 7064},
 {'image_id': None,
  'bbox': [555.7026977539062,
   273.54473876953125,
   53.543212890625,
   40.183349609375],
  'score': 0.7242732644081116,
  'category_id': 0,
  'category_name': 'wildlife',
  'segmentation': [],
  'iscrowd': 0,
  'area': 2151}]

In [33]:
from urllib.parse import urlparse

In [35]:
# Parsing s3 url
url = 's3://wildaidata-test/p1/p2/000113a692ba61cd55ea3acb9c2f9c41709710a1_S2.JPG'
s3_img = Path(url)

img_path = s3_img
bucket = list(img_path.parents)[-3]
bucket_name = str(bucket).split('/')[-1]
filename = str(img_path).replace(f"{str(bucket)}/",'')

bucket_name, filename

('wildaidata-test', 'p1/p2/000113a692ba61cd55ea3acb9c2f9c41709710a1_S2.JPG')

In [37]:
r = urlparse(url, allow_fragments=False)
bucket_name = r.netloc
key = r.path.lstrip('/')
bucket_name,key

('wildaidata-test', 'p1/p2/000113a692ba61cd55ea3acb9c2f9c41709710a1_S2.JPG')