In [None]:
# imports
from itertools import product
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from tqdm import tqdm
from skimage.io import imread,imsave
# import os

# Image tiling for annotation

#### Meanings of arguments
- ```-ratioheight``` : proportion of tile  w.r.t height of image. Example 0.5 means dividing the image in two bands w.r.t height.
- ```-ratiowidth``` : proportion of tile w.r.t to width of image. Example 1.0 means the width of the tile is the same as the image.
- ```-overlapfactor``` : percentage of overlap. It should be less than 1.
- ```-rmheight``` : percentage of height to remove or crop at bottom and top
- ```-rmwidth``` : percentage of width to remove or crop on each side of the image
- ```-pattern``` : "**/*.JPG" will get all .JPG images in directory and subdirectories. On windows it will get both .JPG and .jpg. On unix it will only get .JPG images


In [None]:
# New script for tiling data
# images_to_tile = r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\images"
# destination_directory = r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\images-tiled"
!python ../../HerdNet/tools/patcher.py "D:\PhD\Data per camp\Dry season\Leopard rock\Camp 22 + 37-40\Rep 2" 0 0 0 -overlapfactor 0.1  -ratiowidth 0.33334 -ratioheight 0.5 -rmheight 0.21 -rmwidth 0.08 -dest "D:\PhD\Data per camp\Dry season\Leopard rock\Camp 22 + 37-40\Rep 2 - tiled" -pattern "**/*.JPG"

# Pre-annotating data for labelstudio

In [None]:
from preprocessing import Annotator
import os

In [None]:
# Example
# provide correct alias, "pt", "onnx"
handler = Annotator(mlflow_model_alias='cycle1')
path_img_dir=r"D:\PhD\Data per camp\Dry season\Leopard rock\Camp 22 37-40\Rep 2 - tiled"
root="D:\\"
save_json_path = os.path.join(Path(path_img_dir).parent,
                              f"{Path(path_img_dir).name}_preannotation_label-studio.json")
directory_preds = handler.build_upload_json(path_img_dir=path_img_dir,
                                            root=root,
                                            save_json_path=save_json_path,
                                            pattern="**/*.JPG")

In [None]:
# from preprocessing import Annotator
# from tqdm import tqdm
# import os
# from dotenv import load_dotenv 
# from label_studio_ml.utils import get_local_path
# from label_studio_sdk import Client
# from PIL import Image
# from tqdm import tqdm


# def upload_predictions(project_id:int,
#                        annotator:Annotator,
#                        dotenv_path:str):

#     # Load environment variables
#     load_dotenv(dotenv_path=dotenv_path)

#     # Connect to the Label Studio API and check the connection
#     LABEL_STUDIO_URL = os.getenv('LABEL_STUDIO_URL')
#     API_KEY = os.getenv("LABELSTUDIO-API-KEY")      
#     ls = Client(url=LABEL_STUDIO_URL, api_key=API_KEY)

#     # Select project
#     project = ls.get_project(id=project_id)

#     # Upload predictions for each task
#     tasks = project.get_tasks()
#     for task in tqdm(tasks,desc="Uploading predictions"):
#         task_id = task['id']
#         img_url = task['data']['image']
#         img_path = get_local_path(img_url)
#         img = Image.open(img_path)
#         prediction = annotator.predict(img)
#         img_width, img_height = img.size
#         formatted_pred = [annotator.format_prediction(pred,
#                                                     img_height=img_height,
#                                                     img_width=img_width) for pred in prediction]
#         conf_scores = [pred['score'] for pred in prediction]
#         max_score = 0.0
#         if len(conf_scores)>0:
#             max_score = max(conf_scores)
#         project.create_prediction(task_id=task_id,
#                             score=max_score,
#                             result=formatted_pred,
#                             model_version=annotator.modelversion)

In [None]:
# upload_predictions(project_id=37,
#                    annotator=Annotator(mlflow_model_alias='cycle1'),
#                    dotenv_path="../.env")

In [None]:
from sahi.models.yolov8 import Yolov8DetectionModel
from ultralytics import YOLO
from sahi.predict import get_sliced_prediction
import torch
from PIL import Image
from time import time
from pathlib import Path
import mlflow
import json
import pprint
from tqdm import tqdm
import os
from dotenv import load_dotenv 
from label_studio_ml.utils import get_local_path
from label_studio_sdk import Client
# from preprocessing import Annotator
from copy import deepcopy

class Annotator(object):

    def __init__(self,
                dotenv_path:str='../.env',
                path_to_weights:str=None,
                mlflow_model_alias:str="cycle1",
                mlflow_model_name:str="detector",
                mlflow_model_version:str=None,
                confidence_threshold:float=0.1):
        
        ## Load env variables
        load_dotenv(dotenv_path=dotenv_path)
        LABEL_STUDIO_URL = os.getenv('LABEL_STUDIO_URL')
        API_KEY = os.getenv("LABELSTUDIO-API-KEY")      
        self.labelstudio_client = Client(url=LABEL_STUDIO_URL, api_key=API_KEY)

        ## Load model from path
        self.tilesize=640
        self.overlapratio=0.1
        self.sahi_prostprocess='NMS'
        self.path_to_weights = path_to_weights
        if self.path_to_weights is None:
            ## Load  from mlflow
            TRACKING_URI="http://localhost:5000"
            mlflow.set_tracking_uri(TRACKING_URI)
            client = mlflow.MlflowClient()
            name = mlflow_model_name
            alias = mlflow_model_alias
            version = client.get_model_version_by_alias(name=name,alias=alias).version
            self.modelversion = f'{name}:{version}'
            self.modelURI = f'models:/{name}/{version}'
            self.model = mlflow.pyfunc.load_model(self.modelURI)
        else:
            device = "cuda" if torch.cuda.is_available() else "cpu"
            print('Device:', device)
            self.model = Yolov8DetectionModel(
                                                        model=YOLO(path_to_weights,task='detect'),
                                                        confidence_threshold=confidence_threshold,
                                                        image_size=self.tilesize,
                                                        device=device,
                                                        )
            self.modelversion = Path(path_to_weights).stem
            
        # LS label config
        self.from_name = "label"
        self.to_name = "image"
        self.label_type = "rectanglelabels"
        if mlflow_model_version is not None:
            self.modelversion = mlflow_model_version

    def predict(self, image:bytearray):

        if self.path_to_weights is None:
            return self.model.predict(image)
        
        result = get_sliced_prediction(image,
                                        self.model,
                                        slice_height=self.tilesize,
                                        slice_width=self.tilesize,
                                        overlap_height_ratio=self.overlapratio,
                                        overlap_width_ratio=self.overlapratio,
                                        postprocess_type=self.sahi_prostprocess,
                                        )
        return result.to_coco_annotations()

    def format_prediction(self,pred:dict,img_height:int,img_width:int):
        # formatting the prediction to work with Label studio
        x, y, width, height = pred['bbox']
        label = pred['category_name']
        score = pred['score']
        if not isinstance(score,float):
            score = 0.0
        template = {
                    "from_name": self.from_name,
                    "to_name": self.to_name,
                    "type": self.label_type,
                    "original_width":img_width,
                    "original_height":img_height,
                    "image_rotation":0,
                    'value': {
                        self.label_type: [label,],
                        'x': x / img_width * 100,
                        'y': y / img_height * 100,
                        'width': width / img_width * 100,
                        'height': height / img_height * 100,
                        'rotation':0
                    },
                    'score': score
        }
        return template
    
    def upload_predictions(self,project_id:int):

        # Select project
        project = self.labelstudio_client.get_project(id=project_id)

        # Upload predictions for each task
        tasks = project.get_tasks()
        for task in tqdm(tasks,desc="Uploading predictions"):
            task_id = task['id']
            img_url = task['data']['image']
            img_path = get_local_path(img_url)
            img = Image.open(img_path)
            prediction = self.predict(img)
            img_width, img_height = img.size
            formatted_pred = [self.format_prediction(pred,
                                                    img_height=img_height,
                                                    img_width=img_width) for pred in prediction]
            conf_scores = [pred['score'] for pred in prediction]
            max_score = 0.0
            if len(conf_scores)>0:
                max_score = max(conf_scores)
            project.create_prediction(task_id=task_id,
                                score=max_score,
                                result=formatted_pred,
                                model_version=self.modelversion)
        
    def build_upload_json(self,project_id:int,path_img_dir:str=None,root:str=None,
                          pattern="*.JPG",
                          bulk_predictions:list[dict]=None,
                          save_json_path:str=None):

        directory_preds = list()
        project = self.labelstudio_client.get_project(id=project_id)

        # Upload predictions for each task
        # for image_path in Path(path_img_dir).glob(pattern):
        #     d=image_path.relative_to(Path(root)).as_posix()
        tasks = project.get_tasks()
        for task in tqdm(tasks,desc="Uploading predictions"):
            img_url = task['data']['image']
            image_path = Path(get_local_path(img_url))
            pred = { 
                        "data": {"image" : img_url},
                        "predictions":[],
                    }
            # get predictions
            if bulk_predictions is None:
                start = time()
                image = Image.open(image_path)
                predictions = self.predict(image)
                print(f'Prediction time:{time() - start:.3f} seconds.')
                # format predictions
                img_width, img_height = image.size
                formatted_pred = [self.format_prediction(pred,
                                                        img_height=img_height,
                                                        img_width=img_width) for pred in predictions]
            else:
                predictions = bulk_predictions[image_path.name]
                formatted_pred = [self.format_prediction(pred,
                                                        img_height=pred['height'],
                                                        img_width=pred['width']) for pred in predictions]
            conf_scores = [pred['score'] for pred in predictions]
            # store predictions
            if len(conf_scores)>0:
                pred['predictions'].append({'result':formatted_pred,
                                            'model_version':self.modelversion,
                                            'score':max(conf_scores),
                                            }
                                            )
            else:
                pred['predictions'].append({'result':formatted_pred,
                                            'model_version':self.modelversion,
                                            'score':0.0
                                            }
                                            )
            # update buffer
            directory_preds.append(pred)

        if save_json_path is not None:
            with open(Path(save_json_path),'w') as file:
                json.dump(directory_preds,file,indent=2)

        return directory_preds

In [None]:
# provide correct alias, "pt", "onnx"
handler = Annotator(mlflow_model_alias='cycle1')
# directory_preds = handler.build_upload_json(project_id=37)
handler.upload_predictions(project_id=37)

In [None]:
# path_img_dir=r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\images"
# save_json_path = os.path.join(Path(path_img_dir).parent,
#                               f"{Path(path_img_dir).name}_preannotation_label-studio.json")
# if save_json_path is not None:
#             with open(Path(save_json_path),'w') as file:
#                 json.dump(directory_preds,file,indent=2)

In [None]:
# len(directory_preds)

# Inference with Sahi

In [None]:
from sahi.models.yolov8 import Yolov8DetectionModel
from ultralytics import YOLO
from sahi.predict import get_sliced_prediction
import torch
from PIL import Image
from time import time

In [None]:
class Detector(object):

    def __init__(self,
                path_to_weights:str,
                confidence_threshold:float=0.3):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        self.detection_model = Yolov8DetectionModel(
                                                    # model_path=path_to_weights,
                                                    model=YOLO(path_to_weights,task='detect'),
                                                    confidence_threshold=confidence_threshold,
                                                    image_size=640,
                                                    device=device,
                                                    )
        self.tilesize=640
        self.overlapratio=0.1
        self.sahi_prostprocess='NMS'
        print('Device:', device)
        
    def predict(self, image:str):
        image = Image.open(image)
        result = get_sliced_prediction(image, 
                                        self.detection_model,
                                        slice_height=self.tilesize,
                                        slice_width=self.tilesize,
                                        overlap_height_ratio=self.overlapratio,
                                        overlap_width_ratio=self.overlapratio,
                                        postprocess_type=self.sahi_prostprocess,
                                        ) 

        return result.to_coco_annotations()

In [None]:
image_path = r"D:\PhD\Data per camp\Dry season\Kapiri\Camp 6-8\Rep 1 - tiled\DJI_20231003081043_0016_1.JPG"

In [None]:
times = []
for ext in ['.pt','.onnx']:
    path = r"..\base_models_weights\yolov8.kaza" + ext
    model = Detector(path_to_weights=path,confidence_threshold=0.3)
    start = time()
    model.predict(image=image_path)
    times.append((ext,time() - start))

In [None]:
times

In [None]:
# img_path = Path(r"C:\Users\fadel\OneDrive\Bureau\e-savior\SAVMAP_samples\00a033fefe644429a1e0fcffe88f8b39.JPG")
# directory = img_path.parent/'preprocessed'
# directory.mkdir(parents=False,exist_ok=True)

In [None]:
# data = imread(str(img_path))
# tilesize_h = 1000
# tilesize_w = 1000
# height, width, channels = data.shape 
# count = 0
# for i,j in tqdm(product(list(range(0,height,tilesize_h)),list(range(0,width,tilesize_w)))):
#     tile = data[i:min(i+tilesize_h,height),j:min(j+tilesize_w,width),:]
#     count += 1
#     filename = img_path.name.split('.')[0] + f'#{i}#{j}' + img_path.suffix
#     savepath = directory/filename
#     imsave(savepath,tile)
#     #assert sum(tile.shape) == tilesize_h+tilesize_w+channels,f"{tile.shape}"


In [None]:
# plt.imshow(tile)
# plt.show()

In [None]:
# height,width


# YOLO data_config.yaml 

In [None]:
import yaml
import json
from arguments import Arguments

In [None]:
# load yaml
with open(r"D:\PhD\Data per camp\IdentificationDataset\data_config.yaml",'r') as file:
    yolo_config = yaml.load(file,Loader=yaml.FullLoader)
yolo_config

In [None]:
# load label mapping
args = Arguments()
with open(r"D:\PhD\Data per camp\IdentificationDataset\label_mapping.json",'r') as file:
    label_map = json.load(file)
names = [p['name'] for p in label_map if p['name'] not in args.discard_labels ]
label_map = dict(zip(range(len(names)),names))
label_map

In [None]:
yolo_config.update({'names':label_map,'nc':len(label_map)})
yolo_config

In [None]:
with open(r"D:\PhD\Data per camp\IdentificationDataset\data_config.yaml",'w') as file:
    yaml.dump(yolo_config,file,default_flow_style=False, sort_keys=False)

# Dataset distribution

In [None]:
import yaml
import pandas as pd
import os
from pathlib import Path

In [None]:
# load yaml
with open(r"D:\PhD\Data per camp\Extra training data\WAID\data_config.yaml",'r') as file:
    yolo_config = yaml.load(file,Loader=yaml.FullLoader)
yolo_config

In [None]:
label_map = yolo_config['names']

In [None]:
split = 'train'

path_dataset = os.path.join(yolo_config['path'],yolo_config[split][0])
path_dataset = path_dataset.replace('images','labels')

path_dataset

In [None]:
labels = list()

for txtfile in Path(path_dataset).glob("*.txt"):

    df = pd.read_csv(txtfile,sep=" ",names = ['class','x','y','w','h'] )
    df['class'] = df['class'].astype(int)    
    df['image'] = txtfile.stem
    labels.append(df)


In [None]:
df = pd.concat(labels,axis=0)
df['class'] = df['class'].map(label_map)

In [None]:
images_per_class = dict()
for cls in df['class'].unique():
    num_imge = df.loc[df['class'] == cls,'image'].unique().shape[0]
    images_per_class[cls] = num_imge

In [None]:
print("Split:", split)
print(images_per_class)

In [None]:
print('Split:',split)
print(df['class'].value_counts())

In [None]:
df['class'].value_counts().plot(kind='bar',figsize=(10,5),logy=True,title=f"{split} label distribution")

# Validation metrics

In [None]:
from ultralytics import YOLO
from pathlib import Path

In [None]:
# Load a model
path = r"C:/Users/Machine Learning/Desktop/workspace-wildAI/datalabeling/runs/mlflow/382537255263464058/5cc559b1a98d487983b3defbabe95c5f/artifacts/weights/best.pt"
model = YOLO(path)

In [None]:
# Customize validation settings
validation_results = model.val(data=r"D:\PhD\Data per camp\IdentificationDataset\data_config.yaml", imgsz=640, batch=64, conf=0.25, iou=0.5, device="0")

# Sahi tutorials

In [None]:
from sahi.slicing import slice_coco
from sahi.utils.file import load_json
from skimage.io import imread,imsave
import matplotlib.pyplot as plt
import os
import pandas as pd
import math
import shutil
from pathlib import Path
from arguments import Arguments
from utils import save_df_as_yolo, sample_data, get_slices, convert_json_annotations_to_coco,COCO_DIR_PATH,JSON_DIR_PATH, save_tiles, ALL_CSV
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
coco_dict = load_json(r"..\exported_annotations\coco-format\result.json")
coco_dict

In [None]:
def load_coco_dataset(input_dir=COCO_DIR_PATH):

    def get_upload_img_dir(coco_annotation:dict):
        directory = set([os.path.dirname(metadata['file_name']) for metadata in coco_annotation['images']])
        assert len(directory)==1,'There should be one upload directory per annotation project'
        return directory.pop() #list(directory)[0]

    upload_img_dirs,coco_paths = list(),list()
    for path in Path(input_dir).glob('*.json'):
        annot = load_json(path)
        upload_img_dirs.append(get_upload_img_dir(coco_annotation=annot))
        coco_paths.append(path)
    
    return dict(zip(upload_img_dirs,coco_paths))

load_coco_dataset()


In [None]:
def build_yolo_dataset(args:Arguments,ls_json_dir:str=JSON_DIR_PATH,clear_out_dir:bool=False,load_coco_existing:bool=False,ls_coco_dir:str=COCO_DIR_PATH):

    #clear directories
    if clear_out_dir:
        for p in [args.dest_path_images,args.dest_path_labels,COCO_DIR_PATH]:
            shutil.rmtree(p)
            Path(p).mkdir(parents=True,exist_ok=True)
    if load_coco_existing:
        map_imgdir_cocopath = load_coco_dataset(ls_coco_dir)
    else:
        # convert  json to coco
        map_imgdir_cocopath = convert_json_annotations_to_coco(input_dir=ls_json_dir)

    # slice coco annotations and save tiles
    for img_dir,cocopath in map_imgdir_cocopath.items():
        # slice annotations
        coco_dict_slices = get_slices(coco_annotation_file_path=cocopath,
                            img_dir=img_dir,
                            slice_height=args.height,
                            slice_width=args.width,
                            overlap_height_ratio=args.overlap_ratio,
                            overlap_width_ratio=args.overlap_ratio,
                            min_area_ratio=args.min_visibility
                            )
        # sample tiles
        df_tiles = sample_data(coco_dict_slices=coco_dict_slices,
                                empty_ratio=args.empty_ratio,
                                out_csv_path=None,
                                img_dir=img_dir,
                                labels_to_discard=args.discard_labels
                                )
        return df_tiles
        # save tiles
        # save_tiles(df_tiles=df_tiles,
        #            out_img_dir=args.dest_path_images,
        #            clear_out_img_dir=False)
        # # save labels in yolo format
        # save_df_as_yolo(df_annotation=df_tiles[~df_tiles['x'].isna()].copy(),
        #                 slice_height=args.height,
        #                 slice_width=args.width,
        #                 dest_path_labels=args.dest_path_labels)

        

In [None]:
args = Arguments()
args.empty_ratio = 1
df_tiles = build_yolo_dataset(args=args,clear_out_dir=False,load_coco_existing=True)

In [None]:
len(df_tiles) - 1537*2

In [None]:
df_tiles.head(2)

In [None]:
df_tiles['images'].iloc[:10].to_list()

In [None]:
duplicated = df_tiles.duplicated(['x0','x1','y0','y1','images'])
df_tiles[~duplicated]

In [None]:
df_tiles.duplicated(['images']).sum()

In [None]:
num_annot = list()
for image_name,df in df_tiles.groupby('images'):
    # print(len(df),image_name)
    num_annot.append(len(df))
    # print(image_name)
    

In [None]:
len(num_annot),max(num_annot),min(num_annot)

In [None]:
2205+869

In [None]:
sum(num_annot)

In [None]:
plt.hist(num_annot,bins=13)
plt.show()

In [None]:
df