In [2]:
import pandas as pd
import numpy as np
import time
import os

import random
import library as lib
import data_library as data_lib
from library import GlobalVars
from library import ExperimentConfig

import milvus_experiment as milvus_tool
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
import experiments_configurations as ex_cfg
from tqdm import tqdm

milvus_tool.debug_mode = False


lib.init_random_seed(42)

index_type="milvus_pq_10"
all_milvus_experiments_file_path = GlobalVars.experiments_path + f"all_milvus_experiments_index_{index_type}.csv"
milvus_experiments_output_file =  GlobalVars.experiments_path + f"executed_milvus_experiments_index_{index_type}.csv"
lib.combine_multiple_parameters_v3(all_milvus_experiments_file_path, getattr(ex_cfg, index_type))


In [None]:

def build_configuration_all10sec():
    cfg_dict = {
        "segment_lenght": 10,            
        "segment_overlap": 5,    
        "feature": "pe-mfcc_40",
        "vector_operation": "mean",     
        "metric_type": "COSINE",
        "vote_type": "uniform",
        "neighbors": 15,
        "index_params": {'index_type': 'IVF_FLAT'},                    
        "balanced_type": 1,
        "normalize": 1
    }
    cfg = ExperimentConfig(cfg_dict)
    return milvus_tool.create_and_fill_collection_for_specified_configuration(cfg, True)

#milvus_tool.debug_mode = True
build_configuration_all10sec()

index_specific_params= {'index_type': 'IVF_FLAT'} <class 'dict'>
test1
Self connection= [('default', <pymilvus.client.grpc_handler.GrpcHandler object at 0x0000021B38855150>)]
Conexiunea cu Milvus este activă, for databases: ['default']
Connect client to: http://localhost:19530
Client active with collections ['pe_mfcc_40_vectors_mean_len10_overlap5']
Initialised OK!
collection_name= pe_mfcc_40_vectors_mean_len10_overlap5
Create vector index with params: type:IVF_FLAT,metric_type:COSINE;valoarea params:None
Vector dim= 40 0
INDEX INFO: {'index_type': 'IVF_FLAT', 'metric_type': 'COSINE'}
Check collection: pe_mfcc_40_vectors_mean_len10_overlap5; recreated: True
train:c:\_MasterAI\_Disertatie\bees-analyze-vectordb\app\..\dataset\extended_segments_10_sec_overlap_5\__vectors_pe-mfcc_40_mean.cache
(41723, 12)
                                         vector_data  queen_status  \
0  [0.1760307, 0.79976666, 0.6502152, 0.611387, 0...             0   
1  [0.17432559, 0.8016668, 0.62642074, 0.630014

(500676, 'pe_mfcc_40_vectors_mean_len10_overlap5')

In [None]:
def execute_milvus_experiments(all_milvus_experiments_file_path, results_file_name):
    milvus_tool.debug_mode=False
    df = pd.read_csv(all_milvus_experiments_file_path)

    top_n = None
    if (not top_n is None):
        df = df.head(top_n)
    total_steps = len(df)

    all_results = []
    with tqdm(total=total_steps, desc="Evaluating configs") as pbar:
        for index, row in df.iterrows():
            row['metric_type'] = row['metric_type'].replace("cosine", "COSINE").replace('correlation', 'L2')  
           
            cfg = ExperimentConfig(row)
            GlobalVars.set_segment_lenght_and_overlap(cfg._SEGMENT_LENGHT, cfg._SEGMENT_OVERLAP)
    
            new_row = row.copy()
            metrics_json = milvus_tool.execute_configuration(cfg)
            new_row = lib.extract_experiment_results(new_row=new_row, tool_results=metrics_json, process_key=f"row_{index}", error="")                
            all_results.append(new_row)
            pbar.update(1) 
    fields_names = ["type","vector_operation","metric_type","neighbors","vote_type","accuracy","precision_0","precision_1","precision_2","precision_3", "train_elapsed_time","predict_elapsed_time","train_used_memory","predict_used_memory"]
    fields_names = None
    df = pd.DataFrame(all_results)
    if (not fields_names is None):
        df = df[fields_names]
    df.to_csv(results_file_name, index=False)
    return True

print("all_milvus_experiments_file_path=", all_milvus_experiments_file_path)
execute_milvus_experiments(all_milvus_experiments_file_path=all_milvus_experiments_file_path, results_file_name=milvus_experiments_output_file)

In [None]:
'''

Reads a confirguration from file and try to execute it
'''

# results_file_path='experiments' + os.sep + '20250319_all_experiments_not_normalised.csv'
# cfg_records = get_configurations_from_results(results_file_path, 100)
# cfg_records = cfg_records[cfg_records['segment_lenght']==5]
# cfg_dict = cfg_records.iloc[0].copy()
# cfg_dict["normalize"] = False
# cfg_dict["features"] = ['mfcc_40']
# cfg_dict["field_dim"] = 40
# cfg_dict["vector_operation"] = 'mean_iqr15'
# cfg_dict['metric_type'] = "COSINE"
# cfg_dict['index_type'] = "IVF_SQ8"
# cfg_dict['index_params'] = "{'nlist': 64, 'm': 10, 'nbits': 8}"
# cfg_dict['segment_lenght'] = 2
# cfg_dict['segment_overlap'] = 0
# cfg_dict['balanced_type'] = 1

# print(cfg_dict)

# results, results_json, training_set_size, testing_set_size = full_experiment(cfg_dict=cfg_dict, recreate_if_exists=True)

In [None]:
# all_configs_file = 'experiments/all_experiments.csv'
# exec_file = 'milvus_search_experiment.py'
# df = pd.read_csv(all_configs_file)
# first_row = df.iloc[1].copy()
# print(first_row)
# new_row =  search_experiment.execute_experiment(first_row)
# print(new_row)