In [None]:
import pandas as pd
import numpy as np
import time
import os

import random
import library as lib
import data_library as data_lib
from library import GlobalVars
from library import ExperimentConfig

import milvus_experiment as milvus_tool
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
import experiments_configurations as ex_cfg
from tqdm import tqdm

milvus_tool.debug_mode = False


lib.init_random_seed(42)

index_type="milvus_pq_10"
all_milvus_experiments_file_path = GlobalVars.experiments_path + f"all_milvus_experiments_index_{index_type}.csv"
lib.combine_multiple_parameters_v3(all_milvus_experiments_file_path, getattr(ex_cfg, index_type))
milvus_experiments_output_file =  GlobalVars.experiments_path + f"executed_milvus_experiments_index_{index_type}.csv"


In [None]:

def build_configuration_all10sec():
    cfg_dict = {
        "segment_lenght": 10,            
        "segment_overlap": 'all',    
        "feature": "pe-mfcc_40",
        "vector_operation": "mean",     
        "metric_type": "COSINE",
        "index_params": {'index_type': 'IVF_FLAT'},                    
        "normalize": 1,
        "vote_type": "uniform",
        "neighbors": 15,
    }
    cfg = ExperimentConfig(cfg_dict)
    return milvus_tool.create_and_fill_collection_for_specified_configuration(cfg, True)

build_configuration_all10sec()

In [None]:
def execute_milvus_experiments(all_milvus_experiments_file_path, results_file_name):
    milvus_tool.debug_mode=False
    df = pd.read_csv(all_milvus_experiments_file_path)

    top_n = None
    if (not top_n is None):
        df = df.head(top_n)
    total_steps = len(df)

    all_results = []
    with tqdm(total=total_steps, desc="Evaluating configs") as pbar:
        for index, row in df.iterrows():
            row['metric_type'] = row['metric_type'].replace("cosine", "COSINE").replace('correlation', 'L2')  
           
            cfg = ExperimentConfig(row)
            GlobalVars.set_segment_lenght_and_overlap(cfg._SEGMENT_LENGHT, cfg._SEGMENT_OVERLAP)
    
            new_row = row.copy()
            metrics_json = milvus_tool.execute_configuration(cfg)
            new_row = lib.extract_experiment_results(new_row=new_row, tool_results=metrics_json, process_key=f"row_{index}", error="")                
            all_results.append(new_row)
            pbar.update(1) 
    fields_names = ["type","vector_operation","metric_type","neighbors","vote_type","accuracy","precision_0","precision_1","precision_2","precision_3", "train_elapsed_time","predict_elapsed_time","train_used_memory","predict_used_memory"]
    fields_names = None
    df = pd.DataFrame(all_results)
    if (not fields_names is None):
        df = df[fields_names]
    df.to_csv(results_file_name, index=False)
    return True

print("all_milvus_experiments_file_path=", all_milvus_experiments_file_path)
execute_milvus_experiments(all_milvus_experiments_file_path=all_milvus_experiments_file_path, results_file_name=milvus_experiments_output_file)