In [None]:
import os
import json
import time
import pandas as pd
from dask_cuda import LocalCUDACluster
from dask.distributed import Client
from yuntu.collection.methods import collection
from yuntu.soundscape.pipelines.probe_annotate import ProbeAnnotate

In [None]:
COL_CONFIG = "/shared_volume/audio-develop-backup-09-06-2021/demo/snmb/configs/col_config.json"
PROBE_CONFIG_DETECTION = "/shared_volume/audio-develop-backup-09-06-2021/demo/snmb/configs/probe_config_detection.json"
PROBE_CONFIG_CLASSIFICATION = "/shared_volume/audio-develop-backup-09-06-2021/demo/snmb/configs/probe_config_classification.json"
BASE_DIR = "/shared_volume/audio-develop-backup-09-06-2021/demo/snmb/results"
CONGLOMERATE_ID = '117960'
MIN_RECORD_DURATION = 0.005

In [None]:
def probe_conglomerate(conglomerate_id, pipe_name, base_dir, 
                       probe_config_path, col_config_path, col_query,
                       npartitions, client=None):
    with open(probe_config_path) as file:
        probe_config = json.load(file)

    with open(col_config_path) as file:
        col_config = json.load(file)

    work_dir = os.path.join(base_dir, conglomerate_id)
    
    if not os.path.exists(work_dir):
        os.makedirs(work_dir)

#     log_path = os.path.join(work_dir, pipe_name, "process.log")
#     probe_config["kwargs"] = {"log_file": log_path}
    
    col = collection(**col_config)
    nrecordings = col.db_manager.select(col_query, model="recording").count()
    col.db_manager.db.disconnect()

    print(f"Working with conglomerate {conglomerate_id}. Total files: {nrecordings}")
    
    info_path = os.path.join(work_dir, pipe_name, "info.json")
    tpath = os.path.join(work_dir, pipe_name, "persist", "annotation_result.parquet")

    start = time.time()
    if nrecordings > 0:
        pipeline =  ProbeAnnotate(pipe_name, probe_config, col_config, col_query, work_dir=work_dir)

        if not os.path.exists(tpath):
            annotation_result = pipeline["annotation_result"].compute(client=client,
                                                                      feed={"npartitions": npartitions})
        else:
            print("Data already processed. Reading results...")
            annotation_result = pipeline["annotation_result"].read().compute()

            with open(info_path) as json_file:
                info = json.load(json_file)
            
            return annotation_result, info

    else:
        print(f"No matched ultrasonic recordings in conglomerate {conglomerate_id}.")
        annotation_result = None

    end = time.time()
    elapsed = end - start

    info = {"conglomerate_id": conglomerate_id,
            "total_files": nrecordings,
            "elapsed_time": elapsed}

    if annotation_result is not None:
        with open(info_path, 'w') as outfile:
            json.dump(info, outfile)

    return annotation_result, info

In [None]:
cluster = LocalCUDACluster()
client = Client(cluster)
npartitions = len(client.ncores())

det_col_query = eval(F'''(lambda recording: recording.metadata["conglomerado"]["nombre"] == "{CONGLOMERATE_ID}"
                     and recording.spectrum == "ultrasonic"
                     and recording.media_info["duration"] > {MIN_RECORD_DURATION})''')
detection_result, detection_info = probe_conglomerate(CONGLOMERATE_ID,
                                                      "BATMX_probe",
                                                      BASE_DIR,
                                                      PROBE_CONFIG_DETECTION,
                                                      COL_CONFIG,
                                                      det_col_query,
                                                      npartitions,
                                                      client=client)

In [None]:
cluster = LocalCUDACluster()
client = Client(cluster)
npartitions = len(client.ncores())

class_col_query = eval(F'''(lambda recording: recording.metadata["conglomerado"]["nombre"] == "{CONGLOMERATE_ID}"
                       and recording.spectrum == "ultrasonic"
                       and len(recording.annotations)>0
                       and recording.media_info["duration"] > {MIN_RECORD_DURATION})''')
class_result, class_info = probe_conglomerate(CONGLOMERATE_ID,
                                              "BATMX_class_probe",
                                              BASE_DIR,
                                              PROBE_CONFIG_CLASSIFICATION,
                                              COL_CONFIG,
                                              class_col_query,
                                              npartitions,
                                              client=client)