# Image tiling for annotation

#### Meanings of arguments
- ```-ratioheight``` : proportion of tile  w.r.t height of image. Example 0.5 means dividing the image in two bands w.r.t height.
- ```-ratiowidth``` : proportion of tile w.r.t to width of image. Example 1.0 means the width of the tile is the same as the image.
- ```-overlapfactor``` : percentage of overlap. It should be less than 1.
- ```-rmheight``` : percentage of height to remove or crop at bottom and top
- ```-rmwidth``` : percentage of width to remove or crop on each side of the image
- ```-pattern``` : "**/*.JPG" will get all .JPG images in directory and subdirectories. On windows it will get both .JPG and .jpg. On unix it will only get .JPG images


In [None]:
# New script for tiling data
# images_to_tile = r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\images"
# destination_directory = r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\images-tiled"
!python ../../HerdNet/tools/patcher.py "D:\PhD\Data per camp\Dry season\Leopard rock\Camp 22 + 37-40\Rep 2" 0 0 0 -overlapfactor 0.1  -ratiowidth 0.33334 -ratioheight 0.5 -rmheight 0.21 -rmwidth 0.08 -dest "D:\PhD\Data per camp\Dry season\Leopard rock\Camp 22 + 37-40\Rep 2 - tiled" -pattern "**/*.JPG"

# Pre-annotating data for Labelstudio

In [None]:
from datalabeling.annotator import Annotator
import os
from pathlib import Path

### Creating a JSON file to be uuploaded to Label studio

In [None]:
# Example
# provide correct alias, "pt", "onnx"
handler = Annotator(mlflow_model_alias='cycle1')
path_img_dir=r"D:\PhD\Data per camp\Dry season\Leopard rock\Camp 22 37-40\Rep 2 - tiled"
root="D:\\"
save_json_path = os.path.join(Path(path_img_dir).parent,
                              f"{Path(path_img_dir).name}_preannotation_label-studio.json")
directory_preds = handler.build_upload_json(path_img_dir=path_img_dir,
                                            root=root,
                                            save_json_path=save_json_path,
                                            pattern="**/*.JPG")

### Pre-annotating an existing project using Label studio API
It seems that it will not work well (i.e. filtering) with older projects created prior to Label studio software update.
It is the **recommended way of pre-annotating data in Labelstudio**.

In [None]:
# provide correct alias, "pt", "onnx"
handler = Annotator(mlflow_model_alias='cycle1')
project_id = ... # insert correct project_id by loooking at the url
handler.upload_predictions(project_id=project_id)

# Inference with Sahi

In [None]:
# from sahi.models.yolov8 import Yolov8DetectionModel
# from ultralytics import YOLO
# from sahi.predict import get_sliced_prediction
# import torch
from PIL import Image
from time import time

In [None]:
class Detector(object):

    def __init__(self,
                path_to_weights:str,
                confidence_threshold:float=0.3):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        self.detection_model = Yolov8DetectionModel(
                                                    # model_path=path_to_weights,
                                                    model=YOLO(path_to_weights,task='detect'),
                                                    confidence_threshold=confidence_threshold,
                                                    image_size=640,
                                                    device=device,
                                                    )
        self.tilesize=640
        self.overlapratio=0.1
        self.sahi_prostprocess='NMS'
        print('Device:', device)
        
    def predict(self, image:str):
        image = Image.open(image)
        result = get_sliced_prediction(image, 
                                        self.detection_model,
                                        slice_height=self.tilesize,
                                        slice_width=self.tilesize,
                                        overlap_height_ratio=self.overlapratio,
                                        overlap_width_ratio=self.overlapratio,
                                        postprocess_type=self.sahi_prostprocess,
                                        ) 

        return result.to_coco_annotations()

In [None]:
image_path = r"D:\PhD\Data per camp\Dry season\Kapiri\Camp 6-8\Rep 1 - tiled\DJI_20231003081043_0016_1.JPG"

In [None]:
times = []
for ext in ['.pt','.onnx']:
    path = r"..\base_models_weights\yolov8.kaza" + ext
    model = Detector(path_to_weights=path,confidence_threshold=0.3)
    start = time()
    model.predict(image=image_path)
    times.append((ext,time() - start))

In [None]:
times

In [None]:
# img_path = Path(r"C:\Users\fadel\OneDrive\Bureau\e-savior\SAVMAP_samples\00a033fefe644429a1e0fcffe88f8b39.JPG")
# directory = img_path.parent/'preprocessed'
# directory.mkdir(parents=False,exist_ok=True)

In [None]:
# data = imread(str(img_path))
# tilesize_h = 1000
# tilesize_w = 1000
# height, width, channels = data.shape 
# count = 0
# for i,j in tqdm(product(list(range(0,height,tilesize_h)),list(range(0,width,tilesize_w)))):
#     tile = data[i:min(i+tilesize_h,height),j:min(j+tilesize_w,width),:]
#     count += 1
#     filename = img_path.name.split('.')[0] + f'#{i}#{j}' + img_path.suffix
#     savepath = directory/filename
#     imsave(savepath,tile)
#     #assert sum(tile.shape) == tilesize_h+tilesize_w+channels,f"{tile.shape}"


In [None]:
# plt.imshow(tile)
# plt.show()

In [None]:
# height,width


# YOLO data_config.yaml 

In [None]:
import yaml
import json
from arguments import Arguments

In [None]:
# load yaml
with open(r"D:\PhD\Data per camp\IdentificationDataset\data_config.yaml",'r') as file:
    yolo_config = yaml.load(file,Loader=yaml.FullLoader)
yolo_config

In [None]:
# load label mapping
args = Arguments()
with open(r"D:\PhD\Data per camp\IdentificationDataset\label_mapping.json",'r') as file:
    label_map = json.load(file)
names = [p['name'] for p in label_map if p['name'] not in args.discard_labels ]
label_map = dict(zip(range(len(names)),names))
label_map

In [None]:
yolo_config.update({'names':label_map,'nc':len(label_map)})
yolo_config

In [None]:
with open(r"D:\PhD\Data per camp\IdentificationDataset\data_config.yaml",'w') as file:
    yaml.dump(yolo_config,file,default_flow_style=False, sort_keys=False)

# Dataset distribution

In [None]:
import yaml
import pandas as pd
import os
from pathlib import Path

In [None]:
# load yaml
with open(r"D:\PhD\Data per camp\Extra training data\WAID\data_config.yaml",'r') as file:
    yolo_config = yaml.load(file,Loader=yaml.FullLoader)
yolo_config

In [None]:
label_map = yolo_config['names']

In [None]:
split = 'train'

path_dataset = os.path.join(yolo_config['path'],yolo_config[split][0])
path_dataset = path_dataset.replace('images','labels')

path_dataset

In [None]:
labels = list()

for txtfile in Path(path_dataset).glob("*.txt"):

    df = pd.read_csv(txtfile,sep=" ",names = ['class','x','y','w','h'] )
    df['class'] = df['class'].astype(int)    
    df['image'] = txtfile.stem
    labels.append(df)


In [None]:
df = pd.concat(labels,axis=0)
df['class'] = df['class'].map(label_map)

In [None]:
images_per_class = dict()
for cls in df['class'].unique():
    num_imge = df.loc[df['class'] == cls,'image'].unique().shape[0]
    images_per_class[cls] = num_imge

In [None]:
print("Split:", split)
print(images_per_class)

In [None]:
print('Split:',split)
print(df['class'].value_counts())

In [None]:
df['class'].value_counts().plot(kind='bar',figsize=(10,5),logy=True,title=f"{split} label distribution")

# Computing metrics on Validation set

In [None]:
from ultralytics import YOLO
from pathlib import Path

In [None]:
# Load a model
path = r"C:/Users/Machine Learning/Desktop/workspace-wildAI/datalabeling/runs/mlflow/382537255263464058/5cc559b1a98d487983b3defbabe95c5f/artifacts/weights/best.pt"
model = YOLO(path)

In [None]:
# Customize validation settings
validation_results = model.val(data=r"D:\PhD\Data per camp\IdentificationDataset\data_config.yaml", imgsz=640, batch=64, conf=0.25, iou=0.5, device="0")