In [1]:
import logging
import os

from hirundo.dataset_optimization import OptimizationDataset
from hirundo.enum import LabellingType
from hirundo.storage import StorageIntegration, StorageLink, StorageTypes

logger = logging.getLogger(__name__)

unique_id = os.getenv("UNIQUE_ID", "").replace(".", "-").replace("/", "-")

run_ids = [
    dataset["run_id"] for dataset in OptimizationDataset.list() if dataset["run_id"]
]
for run_id in run_ids:
    OptimizationDataset.cancel_by_id(run_id)
dataset_ids = [dataset["id"] for dataset in OptimizationDataset.list()]
for dataset_id in dataset_ids:
    OptimizationDataset.delete_by_id(dataset_id)
storage_integration_ids = [
    storage_integration["id"] for storage_integration in StorageIntegration.list()
]
for storage_integration_id in storage_integration_ids:
    StorageIntegration.delete_by_id(storage_integration_id)

test_dataset = OptimizationDataset(
    name=f"TEST-STT-MASC-dataset{unique_id}",
    labelling_type=LabellingType.SpeechToText,
    language="ar",
    dataset_storage=StorageLink(
        storage_integration=StorageIntegration(
            name=f"STT-MASC-dataset{unique_id}",
            type=StorageTypes.LOCAL,
            path="MASC",
        ),
    ),
    dataset_metadata_path="meta-old.csv",
)

test_dataset.run_optimization()
results = test_dataset.check_run()
print(results)

  from .autonotebook import tqdm as notebook_tqdm
Cancelling run with ID: e2ee490c-99de-444b-8f34-83caf2d715c4
Deleted dataset with ID: 135
Deleted storage integration with ID: 140
Created storage integration with ID: 141
Created dataset with ID: 136
Started the run with ID: efd28fc7-d295-4a8b-9d95-b4d3bbceadc8
Optimization run completed successfully: 100%|██████████| 100.0/100.0 [05:16<00:00,  3.17s/it]      

suspects=             image_path     label  split  suspect_score  suspect_level  \
3632  /images/36320.png       cat  train   1.000000e+00            1.0   
1479  /images/14790.png      ship  train   9.710029e-01            1.0   
894    /images/8940.png      ship  train   7.296918e-01            1.0   
2034  /images/20340.png      deer  train   7.133279e-01            1.0   
709    /images/7090.png       dog  train   4.025281e-01            1.0   
...                 ...       ...    ...            ...            ...   
2216  /images/22160.png     truck  train   1.899035e-13            0.0   
1814  /images/18140.png      ship  train   9.983570e-14            0.0   
1679  /images/16790.png      bird  train   7.334427e-14            0.0   
288    /images/2880.png     truck  train   5.965531e-14            0.0   
2204  /images/22040.png  airplane  train   2.194576e-14            0.0   

     suggested_label  suggested_label_conf  rank  
3632             cat              0.999084     1  



