In [None]:
# Install requirements, uncomment to run
!pip install geopandas pillow label-studio-converter

# Exploring Savmap dataset

In [None]:
import geopandas as gdp
from PIL import Image
from tqdm import tqdm
import os
from pathlib import Path
import numpy as np
import json
import pandas as pd
from data

In [None]:
#  load data
root = "/Users/sfadel/Desktop/savmap"
polygons = gdp.read_file(os.path.join(root,'savmap_annotations_2014.geojson'))

path_to_images = root #os.path.join(root,'images')
path_to_labels = os.path.join(root,'labels')

In [None]:
# get bounding boxs
for cat in ['x_min', 'y_min', 'x_max', 'y_max','width','height','x','y']:
    polygons[cat] = None
for i in range(len(polygons)):
    x_min, y_min, x_max, y_max = polygons['geometry'].iloc[i].bounds
    image_path = os.path.join(path_to_images,f"{polygons['IMAGEUUID'].iloc[i]}.JPG")
    try:
        width, height = Image.open(image_path).size
    except:
        continue
    polygons['x_min'].iat[i] = int(x_min)
    polygons['x_max'].iat[i] = int(x_max)
    polygons['y_min'].iat[i] = int(y_min)
    polygons['y_max'].iat[i] = int(y_max)
    polygons['x'].iat[i] = round(0.5*(x_max+x_min))
    polygons['y'].iat[i] = round(0.5*(y_max+y_min))
    polygons['width'] = width
    polygons['height'] = height

polygons['bbox_w'] = polygons['x_max'] - polygons['x_min']
polygons['bbox_h'] = polygons['y_max'] - polygons['y_min']
polygons['class'] = 0

In [None]:
polygons.head(2)

In [None]:
# Select labels
data = polygons.loc[:,['x_min','x_max','y_min','y_max','IMAGEUUID','width','height','class']].copy()
data.rename(columns={'IMAGEUUID':'filename',
                     'x_max':'xmax',
                     'x_min':'xmin',
                     'y_min':'ymin',
                     'y_max':'ymax'},inplace=True)
data['filename'] = data['filename'].apply(lambda x: f"{x}.JPG")
data = data.dropna()
data = data[['filename','class','width', 'height','xmin','ymin','xmax','ymax']]
data.head(1)


In [None]:
# Convert CSV to COCO
save_json_path = ... #'cocoformat.json'

images = []
categories = []
annotations = []

category = {}
category["supercategory"] = 'None'
category["id"] = 0
category["category_name"] = 'wildlife'
categories.append(category)

data['fileid'] = data['filename'].astype('category').cat.codes
data['categoryid']= pd.Categorical(data['class'],ordered= True).codes
# data['categoryid'] = data['categoryid']+1
data['annid'] = data.index

def image(row):
    image = {}
    image["height"] = row.height
    image["width"] = row.width
    image["id"] = row.fileid
    image["file_name"] = row.filename
    return image

# def category(row):
#     category = {}
#     category["supercategory"] = 'None'
#     category["id"] = row.categoryid
#     category["category_name"] = row[2]
#     return category

def annotation(row):
    annotation = {}
    area = (row.xmax -row.xmin)*(row.ymax - row.ymin)
    annotation["segmentation"] = []
    annotation["iscrowd"] = 0
    annotation["area"] = area
    annotation["image_id"] = row.fileid
    annotation["score"] = 1.0

    annotation["bbox"] = [row.xmin, row.ymin, row.xmax -row.xmin,row.ymax-row.ymin ]

    annotation["category_id"] = row.categoryid
    annotation["id"] = row.annid
    return annotation

for row in data.itertuples():
    annotations.append(annotation(row))

imagedf = data.drop_duplicates(subset=['fileid']).sort_values(by='fileid')
for row in imagedf.itertuples():
    images.append(image(row))

# catdf = data.drop_duplicates(subset=['categoryid']).sort_values(by='categoryid')
# for row in catdf.itertuples():
#     categories.append(category(row))

data_coco = {}
data_coco["images"] = images
data_coco["categories"] = categories
data_coco["annotations"] = annotations
# json.dump(data_coco, open(save_json_path, "w"), indent=4)

In [None]:
groundtruth = {annot['file_name']:[] for annot in data_coco['images']}
for annot,image_data in zip(data_coco['annotations'],data_coco['images']):
    annot.update(image_data)
    # pprint.pp(annot)
    annot['category_name'] = category['category_name']
    groundtruth[annot['file_name']].append(annot)

In [None]:
# provide correct alias, "pt", "onnx"
handler = Annotator(mlflow_model_alias='pt',
                    mlflow_model_version="groundtruth")
directory_preds = handler.build_upload_json(path_img_dir='/Users/sfadel/Desktop/savmap',
                                            root='/Users/sfadel',
                                            bulk_predictions=groundtruth)

# Formatting savmap dataset
Formatting the savmap dataset so it can be imported in label studio

In [1]:
import geopandas as gdp
from PIL import Image
from tqdm import tqdm
import os
from pathlib import Path

In [2]:
#  load data
root = r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data"
polygons = gdp.read_file(os.path.join(root,'savmap_annotations_2014.geojson'))

# create directories
path_to_images = os.path.join(root,'images')
path_to_labels = os.path.join(root,'labels')
Path(path_to_images).mkdir(exist_ok=True,parents=True)
Path(path_to_labels).mkdir(exist_ok=True,parents=True)

In [3]:
# Move files
# ! mv ../data/savmap_dataset_v2/*.JPG ../data/savmap_dataset_v2/images

In [4]:
# get bounding boxs
for cat in ['x_min', 'y_min', 'x_max', 'y_max','width','height','x','y']:
    polygons[cat] = None
for i in range(len(polygons)):
    x_min, y_min, x_max, y_max = polygons['geometry'].iloc[i].bounds
    image_path = os.path.join(path_to_images,f"{polygons['IMAGEUUID'].iloc[i]}.JPG")
    width, height = Image.open(image_path).size
    polygons['x_min'].iat[i] = int(x_min)
    polygons['x_max'].iat[i] = int(x_max)
    polygons['y_min'].iat[i] = int(y_min)
    polygons['y_max'].iat[i] = int(y_max)
    polygons['x'].iat[i] = round(0.5*(x_max+x_min))
    polygons['y'].iat[i] = round(0.5*(y_max+y_min))
    polygons['width'] = width
    polygons['height'] = height

polygons['bbox_w'] = polygons['x_max'] - polygons['x_min']
polygons['bbox_h'] = polygons['y_max'] - polygons['y_min']
polygons['class'] = 0

In [5]:
polygons.head(1)

Unnamed: 0,IMAGEUUID,TAGUUID,geometry,x_min,y_min,x_max,y_max,width,height,x,y,bbox_w,bbox_h,class
0,f77f4af5a1344b9086b307d2b4ba61ff,a9b3a2325dbe4a208bc3ae37eeb8e1e1,"POLYGON ((1197.00000 568.00000, 1186.00000 568...",1179,568,1230,597,4608,3456,1204,582,51,29,0


In [6]:
# convert to yolo format
def save_df_as_yolo(df_annotation:gdp.GeoDataFrame,dest_path_labels:str):
    
    cols = ['class','x','y','bbox_w','bbox_h']
    for col in cols:
        assert df_annotation[col].isna().sum()<1,'there are NaN values. Check out.'
        # df_annotation[col] = df_annotation[col].apply(int)

    # normalize values
    df_annotation.loc[:,'x'] = df_annotation['x']/df_annotation['width']
    df_annotation.loc[:,'y'] = df_annotation['y']/df_annotation['height']
    df_annotation.loc[:,'bbox_w'] = df_annotation['bbox_w']/df_annotation['width']
    df_annotation.loc[:,'bbox_h'] = df_annotation['bbox_h']/df_annotation['height']
    
    for image_name,df in tqdm(df_annotation.groupby('IMAGEUUID'),desc='Saving yolo labels'):
        txt_file = f'{image_name}.txt'
        df[cols].to_csv(os.path.join(dest_path_labels,txt_file),sep=' ',index=False,header=False)

In [7]:
save_df_as_yolo(df_annotation=polygons,dest_path_labels=path_to_labels)

Saving yolo labels:   0%|          | 0/654 [00:00<?, ?it/s]

Saving yolo labels: 100%|██████████| 654/654 [00:00<00:00, 1191.47it/s]


In [None]:
# tutorial here: https://github.com/HumanSignal/label-studio-converter/tree/master
# if it does not work, use a terminal
# !label-studio-converter import yolo -i "D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data" -o "D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\labelstudio.json" --image-root-url "/data/local-files/?d=PhD/Data per camp/Extra training data/savmap_dataset_v2/raw_data/images" --image-ext ".JPG" --out-type "predictions"

# Formatting General Dataset from Delplanque 2021

In [9]:
import pandas as pd
import os
from pathlib import Path
from PIL import Image
from tqdm import tqdm


## Tiling

In [None]:
# Tiling test
# !python ../../HerdNet/tools/patcher.py "D:\PhD\Data per camp\Extra training data\general_dataset\test" 640 640 64  -dest "D:\PhD\Data per camp\Extra training data\general_dataset\tiled_data\test_tiled\images" -pattern "**/*.JPG" -csv "D:\PhD\Data per camp\Extra training data\general_dataset\groundtruth\csv\test_big_size_A_B_E_K_WH_WB.csv"

In [None]:
# Tiling val
# !python ../../HerdNet/tools/patcher.py "D:\PhD\Data per camp\Extra training data\general_dataset\val" 640 640 64  -dest "D:\PhD\Data per camp\Extra training data\general_dataset\val_tiled" -pattern "**/*.JPG" -csv "D:\PhD\Data per camp\Extra training data\general_dataset\groundtruth\csv\val_big_size_A_B_E_K_WH_WB.csv"

In [None]:
# Tiling Train
# !python ../../HerdNet/tools/patcher.py "D:\PhD\Data per camp\Extra training data\general_dataset\train" 640 640 64  -dest "D:\PhD\Data per camp\Extra training data\general_dataset\train_tiled" -pattern "**/*.JPG" -csv "D:\PhD\Data per camp\Extra training data\general_dataset\groundtruth\csv\train_big_size_A_B_E_K_WH_WB.csv"

## Converting to YOLO format

In [56]:
# convert to yolo format
def save_df_as_yolo(df_annotation:pd.DataFrame,dest_path_labels:str,is_detector:bool=False):
    
    cols = ['class','x','y','bbox_w','bbox_h']
    for col in cols:
        assert df_annotation[col].isna().sum()<1,'there are NaN values. Check out.'
        # df_annotation[col] = df_annotation[col].apply(int)
    
    for col in ['x','y','bbox_w','bbox_h']:
        df_annotation[col] = df_annotation[col].astype(float)

    # normalize values
    df_annotation.loc[:,'x'] = df_annotation['x']/df_annotation['width']
    df_annotation.loc[:,'y'] = df_annotation['y']/df_annotation['height']
    df_annotation.loc[:,'bbox_w'] = df_annotation['bbox_w']/df_annotation['width']
    df_annotation.loc[:,'bbox_h'] = df_annotation['bbox_h']/df_annotation['height']

    if is_detector:
        df_annotation['class'] = 0
    
    for image_name,df in tqdm(df_annotation.groupby('images'),desc='Saving yolo labels'):
        txt_file = f'{Path(image_name).stem}.txt'
        df[cols].to_csv(os.path.join(dest_path_labels,txt_file),sep=' ',index=False,header=False)

In [67]:
split='train' # 'train' 'test', 'val
root = Path(rf"D:\PhD\Data per camp\Extra training data\general_dataset\tiled_data\{split}_tiled")
path_to_csv = root/"gt.csv"
path_images = root/'images'
path_to_labels = root/'labels'
detection_mode=False # save label for wildlife detection only

In [68]:
df_annotations = pd.read_csv(path_to_csv)
# df_annotations.head(2)

# update df_annotations
df_annotations['width'] = 0.0
df_annotations['height'] = 0.0


for name in set(df_annotations.images):
    width, height = Image.open(root/f"images/{name}").size
    df_annotations.loc[df_annotations.images==name,'width'] = float(width)
    df_annotations.loc[df_annotations.images==name,'height'] = float(height)

df_annotations['x'] = 0.5*(df_annotations['x_min'] + df_annotations['x_max'])
df_annotations['y'] = 0.5*(df_annotations['y_min'] + df_annotations['y_max'])
df_annotations['bbox_h'] = df_annotations['y_max'] - df_annotations['y_min']
df_annotations['bbox_w'] = df_annotations['x_max'] - df_annotations['x_min']

df_annotations.rename(columns={'labels':'class'},inplace=True)

In [69]:
# df_annotations.head(2)
# df_annotations['class'].plot(kind='hist')

In [70]:
save_df_as_yolo(df_annotations,path_to_labels,is_detector=detection_mode)

Saving yolo labels: 100%|██████████| 4511/4511 [00:02<00:00, 2155.82it/s]


# WAID 

## Trasforming labels for detector training

In [17]:
from pathlib import Path
import pandas as pd
from tqdm import tqdm

In [29]:
path = r"C:\Users\Machine Learning\Desktop\workspace-wildAI\datalabeling\data\WAID\labels"
path = Path(path)

for p in tqdm(path.glob("*/**/*.txt")):
    df = pd.read_csv(p,sep=" ",header=None)
    df.columns = ["class",'x','y','w','h']
    df['class'] = 0
    # un-comment to run
    # df.to_csv(p, sep=" ", header=False, index=False)

14366it [00:53, 266.73it/s]


## Mapping labels to a reference mapping