# Image tiling for annotation

#### Meanings of arguments
- ```-ratioheight``` : proportion of tile  w.r.t height of image. Example 0.5 means dividing the image in two bands w.r.t height.
- ```-ratiowidth``` : proportion of tile w.r.t to width of image. Example 1.0 means the width of the tile is the same as the image.
- ```-overlapfactor``` : percentage of overlap. It should be less than 1.
- ```-rmheight``` : percentage of height to remove or crop at bottom and top
- ```-rmwidth``` : percentage of width to remove or crop on each side of the image
- ```-pattern``` : "**/*.JPG" will get all .JPG images in directory and subdirectories. On windows it will get both .JPG and .jpg. On unix it will only get .JPG images


In [None]:
# New script for tiling data
# images_to_tile = r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\images"
# destination_directory = r"D:\PhD\Data per camp\Extra training data\savmap_dataset_v2\raw_data\images-tiled"
!python ../../HerdNet/tools/patcher.py "D:\PhD\Data per camp\Wet season\Kapiri\Camp 3\DJI_202402051048_003_KapiriCamp3" 0 0 0 -overlapfactor 0.1  -ratiowidth 0.5 -ratioheight 0.5 -rmheight 0 -rmwidth 0 -dest "D:\PhD\Data per camp\Wet season\Kapiri\Camp 3\DJI_202402051048_003_KapiriCamp3 - tiled" -pattern "**/*.JPG"

# Pre-annotating data for Labelstudio

In [None]:
from dotenv import load_dotenv
load_dotenv('../.env')

from datalabeling.annotator import Annotator
import os
from pathlib import Path
import torch
from tqdm import tqdm

### Creating a JSON file to be uuploaded to Label studio

In [None]:
# Example
# provide correct alias, "pt", "onnx"
alias = "last" # the aliases are found in mlflow tracker UI, use "last-1" to use the previous model
name = "obb-detector" # detector, "obb-detector"
handler = Annotator(mlflow_model_alias=alias,
                    mlflow_model_name=name,
                    is_yolo_obb= name.strip() == "obb-detector",
                    # dotenv_path="../.env"
                    )
path_img_dir=r"D:\PhD\Africa Parks\Liuwa aerial survey_ALL\CENSUS 2019\DAY 2 CENSUS 2019_CONVERTED\AP 2019 day 2 - tiled"
root="D:\\"
save_json_path = os.path.join(Path(path_img_dir).parent, f"{Path(path_img_dir).name}_preannotation_label-studio.json")

# build and saves json
directory_preds = handler.build_upload_json(path_img_dir=path_img_dir,
                                            root=root,
                                            save_json_path=save_json_path,
                                            pattern="**/*.JPG")

### Pre-annotating an existing project using Label studio API
It seems that it will not work well (i.e. filtering) with older projects created prior to Label studio software update.
It is the **recommended way of pre-annotating data in Labelstudio**.

In [None]:
# provide correct alias, "pt", "onnx"
aliases = ["version11"]
project_id = 47 # insert correct project_id by loooking at the url
for alias in aliases:
    name = "obb-detector" # detector, "obb-detector"
    handler = Annotator(mlflow_model_alias=alias,
                        mlflow_model_name=name,
                        confidence_threshold=0.25,
                        is_yolo_obb=name.strip() == "obb-detector",
                        dotenv_path="../.env")
    handler.upload_predictions(project_id=project_id)

**Before running the script below, make sure that you have exported the annotations so you can revert back!!!**

In [None]:
#  Cleaning annotations - NO WAY BACK
name = "obb-detector"
handler = Annotator(mlflow_model_alias="version6",
                        mlflow_model_name=name,
                        confidence_threshold=0.25,
                        is_yolo_obb=name.strip() == "obb-detector",
                        dotenv_path="../.env")

# Select project
project_id = 88
project = handler.labelstudio_client.get_project(id=project_id)

# Delete annotations saved with label "wildlife" assigned by the predictor
tasks = project.get_tasks()
for task in tqdm(tasks,desc="correcting annotations"):
        task_id = task['id']
        img_url = task['data']['image']

        if len(task["annotations"][0]['result'])>1:
            results_to_keep = []
            annot_id = task["annotations"][0]["id"]
            for annot in task['annotations'][0]['result']:
                if annot['value']['rectanglelabels'][0] != 'wildlife':
                    results_to_keep.append(annot)
                    # print(annot['value'],annot['id'],end="\n")
            # print(f"Updating annotations {annot_id} from task {task_id}.")
            # print(results_to_keep)
            project.update_annotation(annot_id,result=results_to_keep)

In [None]:
len(task['annotations']), len(task['annotations'][0]['result']), task['id'], task["annotations"][0]["id"]

In [None]:
task['annotations'][0]['result'][0] #['value']['rectanglelabels']

In [None]:
results_to_keep = []
for annot in task['annotations'][0]['result']:
    if annot['value']['rectanglelabels'][0] != 'wildlife':
        results_to_keep.append(annot)
        print(annot['value'],annot['id'],end="\n")

In [None]:
results_to_keep

In [None]:
project.update_annotation(annotation_id=...,)

To speed up inference on intel, make changes inn ultralytics/nn/autobackend.py:
```
- device_name = "AUTO:NPU,GPU,CPU" # CPU, GPU, NPU, AUTO,"AUTO:GPU,NPU"
- inference_mode = "LATENCY" # OpenVINO inference modes are 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT'
- LOGGER.info(f"Using OpenVINO {inference_mode} mode for inference...")
- ov_compiled_model = core.compile_model(
                ov_model,
                device_name=device_name,  # AUTO selects best available device, do not modify
                config={"PERFORMANCE_HINT": inference_mode,
                        "CACHE_DIR": os.environ["OPENVINO_CACHE_MODEL"]}, # make sure to set environment variable
            )
```

In [None]:
# using path_to_weights
# go to ultralytics.nn.autobackend to modify ov_compiled device to "AUTO:NPU,GPU,CPU"

use_sliding_window=True

handler = Annotator(path_to_weights=r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best_openvino_model",
                    is_yolo_obb=True,
                    tilesize=1280,
                    overlapratio=0.1,
                    use_sliding_window=use_sliding_window,
                    confidence_threshold=0.5,
                    device="NPU", # "cpu", "cuda"
                    tag_to_append=f"-sahi:{use_sliding_window}",
                    dotenv_path="../.env")

project_id = 3 # insert correct project_id by loooking at the url
top_n=10
handler.upload_predictions(project_id=project_id,top_n=top_n)

In [None]:
from label_studio_ml.utils import get_local_path
from urllib.parse import unquote, quote
import os
path = unquote("/data/local-files/?d=savmap_dataset_v2%5Cimages_splits%5C003a34ee6b7841e6851b8fe511ebe102_0.JPG")
get_local_path(path,download_resources=False)#,os.path.exists(get_local_path(path))

# Inference with Sahi

In [None]:
from ultralytics import YOLO
from PIL import Image
import time
import numpy as np
from datalabeling.annotator import Detector
from dotenv import load_dotenv

In [None]:
# load env variable, loads model cache location!!
load_dotenv('../.env')

In [None]:
IMAGE_PATH = r"D:\savmap_dataset_v2\images_splits\00a033fefe644429a1e0fcffe88f8b39_1.JPG"

## Optimizing with Openvino

To speed up inference on intel, make changes inn ultralytics/nn/autobackend.py:
```
- device_name = "AUTO:NPU,GPU,CPU" # CPU, GPU, NPU, AUTO,"AUTO:GPU,NPU"
- inference_mode = "LATENCY" # OpenVINO inference modes are 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT'
- LOGGER.info(f"Using OpenVINO {inference_mode} mode for inference...")
- ov_compiled_model = core.compile_model(
                ov_model,
                device_name=device_name,  # AUTO selects best available device, do not modify
                config={"PERFORMANCE_HINT": inference_mode,
                        "CACHE_DIR": os.environ["OPENVINO_CACHE_MODEL"]}, # make sure to set environment variable
            )
```

In [None]:
# Define detector
# to speed up inference on intel, make
model = Detector(path_to_weights=r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best_openvino_model",
                confidence_threshold=0.1,
                overlap_ratio=0.1,
                tilesize=1280,
                device='CPU',
                use_sliding_window=False,
                is_yolo_obb=True)

In [None]:
image = Image.open(IMAGE_PATH)

while True:
    start_time = time.perf_counter()
    print(model.predict(image,return_coco=True,nms_iou=0.5))
    end_time = time.perf_counter()
    print(f"Device took {end_time-start_time:.2f} seconds.")

    break

In [None]:
# inference with openvino
import openvino as ov
import openvino.properties.hint as hints
import torch
import torchvision.transforms as F
from ultralytics.utils import DEFAULT_CFG
from ultralytics.cfg import get_cfg
from ultralytics.data.converter import coco80_to_coco91_class

# load validator
args = get_cfg(cfg=DEFAULT_CFG)
det_model = YOLO(r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best.pt")
det_validator = det_model.task_map[det_model.task]["validator"](args=args)
det_validator.is_coco = True
det_validator.class_map = coco80_to_coco91_class()
det_validator.names = det_model.model.names
det_validator.metrics.names = det_validator.names
det_validator.nc = det_model.model.model[-1].nc
det_validator.stride = 32
args = get_cfg(cfg=DEFAULT_CFG)
det_model = YOLO(r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best.pt")

core = ov.Core()
det_model_path = r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best_openvino_model\best.xml"
det_ov_model = core.read_model(det_model_path)

device = "AUTO:NPU,GPU" # CPU, NPU, GPU "AUTO:NPU,GPU,CPU" 

print("Available core devices: ",core.available_devices)

# reshaping for batch prediction
input_layer = det_ov_model.input(0)
output_layer = det_ov_model.output(0)
new_shape = ov.PartialShape([1, 3, 1280, 1280])
det_ov_model.reshape({input_layer.any_name: new_shape})

ov_config = {hints.performance_mode: hints.PerformanceMode.THROUGHPUT,
             "CACHE_DIR": '../models/model_cache'}

if ("GPU" in core.available_devices) and device=="GPU":
    ov_config["GPU_DISABLE_WINOGRAD_CONVOLUTION"] = "YES"
det_compiled_model = core.compile_model(det_ov_model, device, ov_config)

def infer(image):
    image = det_validator.preprocess({"img":image,"batch_idx":torch.Tensor([0]),
                                      "cls":torch.Tensor([0]),
                                      "bboxes":torch.Tensor([0.,0.,0.,0.])})["img"]
    results = det_compiled_model(image)
    preds = torch.from_numpy(results[det_compiled_model.output(0)])
    return det_validator.postprocess(preds) #torch.from_numpy(result[0])

In [None]:
# image = Image.open(IMAGE_PATH)
# image = F.PILToTensor()(image)[None,:,:1280,:1280]
# infer(image)

In [None]:
# inference with pt
# model = YOLO(r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best.pt",task='obb')

In [None]:
# rescaling input images
# model(image/255.)

In [None]:
# inference with openvino
# model_vino = YOLO(r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best_openvino_model",task='obb')
# model_vino(image/255.)

In [None]:
# sahi_model_obb = Detector(path_to_weights=r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best_openvino_model",
#                     confidence_threshold=0.6,
#                     overlap_ratio=0.1,
#                     tilesize=640,
#                     is_yolo_obb=True)

In [None]:
# image_path = r"D:\savmap_dataset_v2\images\0d1ba3c424ad4414ac37dbd0c93460ea.JPG"
# image = Image.open(image_path)
# print(image.size)

In [None]:
# result = sahi_model_obb.predict(image,False)

In [None]:
# result
# result.export_visuals('../.tmp')

## Sahi inference calibration

In [None]:
from itertools import product

In [None]:
# hyperparams
overlap_ratios = [0.1,0.2,0.3]
tilesizes = [640,2*640,3*640]
imgsz = [640,2*640,3*640]

for ratio, tilesize, image_size in product(overlap_ratios,tilesizes,imgsz):
    print(ratio,tilesize,image_size)
    # Define detector
    # to speed up inference on intel, make
    model = Detector(path_to_weights=r"C:\Users\FADELCO\OneDrive\Bureau\datalabeling\models\best_openvino_model",
                    confidence_threshold=0.1,
                    overlap_ratio=0.1,
                    tilesize=2000,
                    imgsz=1280,
                    device='CPU',
                    use_sliding_window=True,
                    is_yolo_obb=True)
    
    #TODO


# YOLO data_config.yaml 

In [None]:
import yaml
import json
from datalabeling.arguments import Arguments
import os
import pandas as pd

In [None]:
# load yaml
with open(r"D:\PhD\Data per camp\DetectionDataset\hard_samples\train_ratio_20-seed_41.yaml",'r') as file:
    yolo_config = yaml.load(file,Loader=yaml.FullLoader)
yolo_config

In [None]:
pd.read_csv(os.path.join(yolo_config["path"],yolo_config['train']),header=None,names=['paths'])['paths'].to_list()[:]

In [None]:
# load label mapping
args = Arguments()
with open(r"D:\PhD\Data per camp\IdentificationDataset\label_mapping.json",'r') as file:
    label_map = json.load(file)
names = [p['name'] for p in label_map if p['name'] not in args.discard_labels ]
label_map = dict(zip(range(len(names)),names))
label_map

In [None]:
yolo_config.update({'names':label_map,'nc':len(label_map)})
yolo_config

In [None]:
with open(r"D:\PhD\Data per camp\IdentificationDataset\data_config.yaml",'w') as file:
    yaml.dump(yolo_config,file,default_flow_style=False, sort_keys=False)

# Dataset distribution

## Visualize distribution per annotation project

In [None]:
from datalabeling.dataset import convert_json_annotations_to_coco, load_coco_annotations
from pathlib import Path
import json
import pandas as pd
from collections import Counter
from dotenv import load_dotenv
from label_studio_sdk import Client
# from itertools import chain
import traceback
import os

In [None]:
dotenv_path=r"..\.env"
load_dotenv(dotenv_path=dotenv_path)
# Connect to the Label Studio API and check the connection
LABEL_STUDIO_URL = os.getenv('LABEL_STUDIO_URL')
API_KEY = os.getenv("LABEL_STUDIO_API_KEY")
labelstudio_client = Client(url=LABEL_STUDIO_URL, api_key=API_KEY)

In [None]:

def get_project_stats(project_id:int,annotator_id = 0):
    
    project = labelstudio_client.get_project(id=project_id)
    num_images = dict()
    # Iterating 
    tasks = project.get_tasks()
     # because there is
    labels = []

    for task in tasks:
        try:
            result = task['annotations'][annotator_id]['result']
        except Exception as e:
            traceback.print_exc()
            continue

        img_labels = []
        for annot in result:
            img_labels = annot['value']['rectanglelabels'] + img_labels
        labels = labels + img_labels
        # update stats holder
        for label in set(img_labels):
            try:
                num_images[label] += 1
            except:
                num_images[label] = 1

    stats = {f"{k}":labels.count(k) for k in set(labels)}
    print("Number of instances for each label is:\n",stats,end="\n\n")
    print("Number of images for each label is:\n",num_images)

    return stats, num_images

# get stats
for project_id in [93,]:
    get_project_stats(project_id)

In [None]:
ls_dir = r"D:\PhD\Data per camp\Exported annotations and labels\Wet season - Rep 1\all\labelstudio"
dest_dir = Path(ls_dir).with_name("coco-format")
save_excel_path = Path(ls_dir).with_name("stats.xlsx")

# Uncomment to run if needed
# convert_json_annotations_to_coco(input_dir=ls_dir,
#                                  dest_dir_coco=str(dest_dir),
#                                  ls_client=labelstudio_client,
#                                  parse_ls_config=True)

In [None]:
coco_annotations_dict = load_coco_annotations(dest_dir)
coco_annotations_dict

In [None]:
def get_labels_count(coco_annotation:dict):

    result = Counter([annot['category_id'] for annot in coco_annotation['annotations']])

    label_map = {cat['id']:cat['name'] for cat in coco_annotation['categories']}

    result = {label_map[k]:v for k,v in result.items()}

    return result

label_stats = dict()

for img_dir,coco_path in coco_annotations_dict.items():

    with open(coco_path,'r') as f:
        coco_annotation = json.load(fp=f)
    
    label_stats[img_dir] = get_labels_count(coco_annotation)

label_stats = pd.DataFrame.from_dict(label_stats,orient='index').fillna(0)

In [None]:
label_stats

In [None]:
# uncomment to save
label_stats.to_excel(save_excel_path)

## Visualize splits' distribution

In [None]:
import yaml
import pandas as pd
import os
from pathlib import Path

In [None]:
# load yaml
with open(r"D:\PhD\Data per camp\Extra training data\WAID\data_config.yaml",'r') as file:
    yolo_config = yaml.load(file,Loader=yaml.FullLoader)
yolo_config

In [None]:
label_map = yolo_config['names']

In [None]:
split = 'train'

path_dataset = os.path.join(yolo_config['path'],yolo_config[split][0])
path_dataset = path_dataset.replace('images','labels')

path_dataset

In [None]:
labels = list()

for txtfile in Path(path_dataset).glob("*.txt"):

    df = pd.read_csv(txtfile,sep=" ",names = ['class','x','y','w','h'] )
    df['class'] = df['class'].astype(int)    
    df['image'] = txtfile.stem
    labels.append(df)


In [None]:
df = pd.concat(labels,axis=0)
df['class'] = df['class'].map(label_map)

In [None]:
images_per_class = dict()
for cls in df['class'].unique():
    num_imge = df.loc[df['class'] == cls,'image'].unique().shape[0]
    images_per_class[cls] = num_imge

In [None]:
print("Split:", split)
print(images_per_class)

In [None]:
print('Split:',split)
print(df['class'].value_counts())

In [None]:
df['class'].value_counts().plot(kind='bar',figsize=(10,5),logy=True,title=f"{split} label distribution")

# Computing metrics on Validation set

In [None]:
from ultralytics import YOLO
# from pathlib import Path
import torch

In [None]:
# Load a model
path = r"C:/Users/Machine Learning/Desktop/workspace-wildAI/datalabeling/runs/mlflow/140168774036374062/d0dbe2b4cbe143258121a734edd9dca8/artifacts/weights/best.pt"
# path = r"C:\Users\fadel\OneDrive\Bureau\WILD-AI\datalabeling\base_models_weights\yolov5su.pt"
model = YOLO(path)  

In [None]:
pred = model.predict(r"C:\Users\fadel\OneDrive\Bureau\WILD-AI\datalabeling\data\train_wildai\images\01f1653a94f14044bf11d78c5b4221d1.JPG")

In [None]:
[result.obb for result in pred]

In [None]:
pred[0].obb.xyxy

In [None]:
pred[0].obb.cls

In [None]:
pred[0].obb.conf

In [None]:
# Customize validation settings
validation_results = model.val(data=r"C:\Users\Machine Learning\Desktop\workspace-wildAI\datalabeling\data\dataset_hn.yaml",
                                imgsz=1280,
                                batch=32,
                                conf=0.25,
                                iou=0.45,
                                device="cuda"
                            )

In [None]:
# the version 6 is likely to have already seen the valaidation data

In [None]:
# Compute predictions
from dotenv import load_dotenv
load_dotenv('../.env')

from datalabeling.annotator import Annotator

for alias in ["version9", "version6"]:
    print("-"*10,alias,end="\n\n")
    name = "obb-detector" # detector, "obb-detector"
    handler = Annotator(mlflow_model_alias=alias,
                            mlflow_model_name=name,
                            confidence_threshold=0.25,
                            is_yolo_obb=name.strip() == "obb-detector",
                            dotenv_path="../.env")

    yolo_model = handler.model.unwrap_python_model().detection_model.detection_model.model
    validation_results = yolo_model.val(data=r"C:\Users\Machine Learning\Desktop\workspace-wildAI\datalabeling\data\dataset_hn.yaml",
                                    imgsz=1280,
                                    batch=32,
                                    conf=0.25,
                                    iou=0.45,
                                    device="cuda"
                                )
    
    print(validation_results)

# Optimizing inference params

In [None]:
from datalabeling.annotator import Detector
from datalabeling.arguments import Arguments
from datalabeling.dataset.sampling import (get_preds_targets, compute_detector_performance, get_uncertainty)    
import yaml, os
from hyperopt import tpe, hp, fmin

In [None]:
# params 
args = Arguments()
args.path_to_weights = r"C:/Users/Machine Learning/Desktop/workspace-wildAI/datalabeling/runs/mlflow/140168774036374062/57daf3bcd99b4dd4b040cb4f8670960c/artifacts/weights/best.pt"
# args.confidence_threshold = 0.2
# args.overlap_ratio = 0.1
args.use_sliding_window = True
args.device = "cuda"
args.is_yolo_obb = True
args.pred_results_dir = r"C:\Users\Machine Learning\Desktop\workspace-wildAI\datalabeling\.tmp"
args.data_config_yaml = r"C:\Users\Machine Learning\Desktop\workspace-wildAI\datalabeling\data\dataset_hn.yaml"
args.hn_uncertainty_method = "entropy"


In [None]:
# load groundtruth
with open(args.data_config_yaml,'r') as file:
    yolo_config = yaml.load(file,Loader=yaml.FullLoader)

split='val'
images_path = [os.path.join(yolo_config['path'],yolo_config[split][i]) for i in range(len(yolo_config[split]))]
images_path

In [None]:
def objective(params:dict):

    # Define detector
    model = Detector(path_to_weights=args.path_to_weights,
                        confidence_threshold=params['confidence_threshold'],
                        overlap_ratio=params['overlap_ratio'],
                        tilesize=params['tilesize'],
                        imgsz=params['imgsz'],
                        use_sliding_window=args.use_sliding_window,
                        device=args.device,
                        is_yolo_obb=args.is_yolo_obb
                    )

    df_results, df_labels, col_names = get_preds_targets(images_dirs=images_path,
                                                        pred_results_dir=args.pred_results_dir,
                                                        detector=model,
                                                        load_results=False,
                                                        save_tag=f"{params['imgsz']}-{params['tilesize']}-{params['overlap_ratio']}-{params['confidence_threshold']}"
                                                        )

    df_results_per_img = compute_detector_performance(df_results,df_labels,col_names)
    # df_results_per_img = get_uncertainty(df_results_per_img=df_results_per_img,mode=args.hn_uncertainty_method)

    # minizing loss -> maximize map50 and map75
    loss = -1.0*df_results_per_img["map50"].mean() - df_results_per_img["map75"].mean() #+ df_results_per_img["uncertainty"].mean()

    return loss

In [None]:
search_space = {
                'confidence_threshold': hp.uniform('x', 0.1, 0.7),
                'overlap_ratio': hp.uniform('y', 0, 0.25),
                'tilesize': hp.choice(label='tilesize',options=[640, 2*640]),
                'imgsz': hp.choice(label='imgsz',options=[640, 2*640, 3*640, 4*640]),
            }

best = fmin(
    fn=objective, # Objective Function to optimize
    space=search_space, # Hyperparameter's Search Space
    algo=tpe.suggest, # Optimization algorithm (representative TPE)
    max_evals=2 # Number of optimization attempts
)

In [None]:
print(best)

# Dataset label format conversion

In [None]:
import pandas as pd
import numpy as np

In [None]:
def check_label_format(loaded_df:pd.DataFrame)->str:
    """checks label format

    Args:
        loaded_df (pd.DataFrame): target values

    Raises:
        NotImplementedError: when the format is not yolo or yolo-obb

    Returns:
        str: yolo or yolo-obb
    """

    num_features = len(loaded_df.columns)

    if num_features == 5:
        return "yolo"
    elif num_features == 9:
        return "yolo-obb"
    else:
        raise NotImplementedError(f"The number of features ({num_features}) in the label file is wrong. Check yolo or yolo-obb format.")

In [None]:
label_path = r"D:\PhD\Data per camp\DetectionDataset\Rep 1\train\labels\DJI_20231002150401_0009_0_48_0_1271_640_1911.txt"
df = pd.read_csv(label_path,sep=' ',header=None)
df

In [None]:
isinstance(df.iloc[:,0].dtype, np.dtypes.IntDType)

In [None]:
check_label_format(df)

In [None]:
len(df.columns)

In [None]:
df.columns = ['id','x1','y1','x2','y2','x3','y3','x4','y4']

df

# Debug

In [None]:
from ultralytics import YOLO
import yaml
from datalabeling.arguments import Arguments
import os, logging, traceback
from pathlib import Path
import pandas as pd
import math

In [None]:
def sample_pos_neg(images_paths:list,ratio:float,seed:int=41):

    # build dataframe
    is_empty = [1 - Path(str(p).replace('images','labels')).with_suffix('.txt').exists() for p in images_paths]
    data = pd.DataFrame.from_dict({"image_paths":images_paths,"is_empty":is_empty},
                                        orient="columns")
    # get empty and non empty
    num_empty = (data["is_empty"]==1).sum()
    num_non_empty = len(data)-num_empty
    if num_empty==0:
        print("contains only positive samples")
    num_sampled_empty = min(math.floor(num_non_empty*ratio),num_empty)
    sampled_empty = data.loc[data['is_empty']==1].sample(n=num_sampled_empty,random_state=seed)
    # concatenate
    sampled_data = pd.concat([sampled_empty,data.loc[data['is_empty']==0]])

    print(f"Sampling: pos={num_non_empty} & neg={num_sampled_empty}",end="\n")

    return sampled_data['image_paths'].to_list()


def get_data_cfg_paths_for_cl(ratio:float,data_config_yaml:str,cl_save_dir:str,seed:int=41,split:str='train'):

    with open(data_config_yaml,'r') as file:
        yolo_config = yaml.load(file,Loader=yaml.FullLoader)

    root = yolo_config["path"]
    train_dirs_images = [os.path.join(root,p) for p in yolo_config[split]]
    
    # sample positive and negative images
    sampled_imgs_paths = []
    for dir_images in train_dirs_images:
        print(f"Sampling positive and negative samples from {dir_images}")
        paths = sample_pos_neg(images_paths=list(Path(dir_images).iterdir()),
                       ratio=ratio,
                       seed=seed
                       )
        sampled_imgs_paths = sampled_imgs_paths + paths

    
    # save selected images in txt file
    save_path_samples = os.path.join(cl_save_dir,f"{split}_ratio_{ratio}-seed_{seed}.txt")
    pd.Series(sampled_imgs_paths).to_csv(save_path_samples,
                                        index=False,header=False)
    print(f"Saving {len(sampled_imgs_paths)} sampled images.")
    # save config
    if split == 'train':
        cfg_dict = {'path':root,
                    'names': yolo_config['names'],
                    'train': os.path.relpath(save_path_samples,start=root),
                    'val':   yolo_config['val'],
                    'nc': yolo_config['nc'],
                }
    elif split == 'val':
        cfg_dict = {'path':root,
                    'names': yolo_config['names'],
                    'val': os.path.relpath(save_path_samples,start=root),
                    'train':   yolo_config['val'],
                    'nc': yolo_config['nc'],
                }
    else:
        raise NotImplementedError
    save_path_cfg = Path(save_path_samples).with_suffix('.yml')
    with open(save_path_cfg,'w') as file:
        yaml.dump(cfg_dict,file)

    print(f"Saving samples at: {save_path_samples} and data_cfg at {save_path_cfg}",end="\n\n")

    return save_path_cfg


In [None]:
data_config_yaml=r"C:\Users\Machine Learning\Desktop\workspace-wildAI\datalabeling\data\dataset_1.yaml"
cl_save_dir = r"D:\PhD\Data per camp\DetectionDataset\continuous_learning"
cl_cfg_path = get_data_cfg_paths_for_cl(ratio=1.,data_config_yaml=data_config_yaml,cl_save_dir=cl_save_dir,seed=41,split='train')
cl_cfg_path


In [None]:
for lr, ratio, num_epochs,freeze in zip((1e-3,5e-3,1e-4,1e-5),(1,2,5,10),(20,5,5,5),(None,10,15,20)):

    print(lr, ratio, num_epochs, freeze)

In [None]:
data_config_yaml=r"C:\Users\Machine Learning\Desktop\workspace-wildAI\datalabeling\data\dataset_1.yaml"
try:
    with open(data_config_yaml,'r') as file:
        yolo_config = yaml.load(file,Loader=yaml.FullLoader)
    root = yolo_config["path"]
    for p in yolo_config["train"] + yolo_config["val"]:
        path = os.path.join(root,p,"..\\labels.cache")
        if os.path.exists(path):
            os.remove(path)
            print(f"Removing: {os.path.join(root,p,"..\\labels.cache")}")
except Exception as e:
     # print(e)
    traceback.print_exc()

In [None]:
pd.Series([False,True]) + pd.Series([False,True]) + pd.Series([False,True])