In [1]:
import os
import yaml
from itertools import product
import math
from pathlib import Path
import sys

In [2]:
PROJECT_ROOT = Path.cwd().parent  # points to nl-pe/
SRC_PATH = PROJECT_ROOT / "src"
sys.path.insert(0, str(SRC_PATH))  # make nl_pe importable
os.chdir(PROJECT_ROOT)  

In [56]:
def map_vars_to_config(
        data='',
        embedder = '',
        gpu_batch_size=None,
        #gp
        noise = None,
        sigma_sig = None,
        #Active Learning
        acquisition_f = None,
        #gr-eps
        epsilon = None,
        #ucb
        ucb_beta_const = None,
        ):

    base_data_dir = os.path.join('data', 'ir', data)
    embedder_dir = os.path.join(base_data_dir, embedder)

    #set query label for dataset:
    if data == 'beir\\nfcorpus' or data == 'beir\trec-covid':
        query_rel_label = 2
    else:
        raise ValueError(f"Unknown query label value for: {data}")

    var_config_mapping = {
        'data': {
            data: {
                'data': {
                    'd_text_csv': os.path.join(base_data_dir, 'docs.csv'),
                    'q_text_csv': os.path.join(base_data_dir, 'test_queries.csv'),
                    'index_path': os.path.join(embedder_dir, 'faiss', 'index'),
                    'doc_ids_path': os.path.join(embedder_dir, 'faiss', 'index_doc_ids.pkl'),
                    'qrels_path': os.path.join(base_data_dir, 'qrels', 'test.txt'),
                }
            }
        },
        'query_rel_label': {
            query_rel_label: {
                'gp': {'query_rel_label': query_rel_label},
                }
        },
        'gpu_batch_size': {
            gpu_batch_size: {
                'embedding': {'inference_batch_size': gpu_batch_size},
                'data': {'embedding_batch_size': gpu_batch_size},
                }
        },
        'device': {
            'gpu': {
                'inference_device': 'gpu',
                'tensor_ops_device': 'gpu',
                },
            'cpu': {
                'inference_device': 'cpu',
                'tensor_ops_device': 'cpu',}
        },
        #GP
        'noise': {
            noise: {
                'gp': {'observation_noise': noise},
                }
        },
        'sigma_sig': {
            sigma_sig: {
                'gp': {'signal_noise': sigma_sig},
                }
        },
        #Active Learning
        'acquisition_f': {
            acquisition_f: {
                'active_learning': {'acquisition_f': acquisition_f},
                }
        },
        #UCB
        'ucb_beta_const': {
            ucb_beta_const: {
                'active_learning': {'ucb_beta_const': ucb_beta_const},
                }
        },
        #GR-EPS
        'epsilon': {
            epsilon: {
                'active_learning': {'epsilon': epsilon},
                }
        },
    }
    return var_config_mapping

In [57]:
def deep_update(original, update):
    for key, value in update.items():
        if isinstance(value, dict):
            original[key] = deep_update(original.get(key, {}), value)
        else:
            original[key] = value
    return original


def save_config(config, base_dir, experiment_name):
    full_path = os.path.join(base_dir, experiment_name)
    os.makedirs(full_path, exist_ok=True)
    with open(os.path.join(full_path, "config.yaml"), 'w') as f:
        yaml.dump(config, f)

    #EMNLP EVAL CONFIG SAVING
    # measures = []
    # for i in range(1, 176):
    #     measures.append(f"ndcg_cut_{i}")
    # for i in range(1, 176):
    #     measures.append(f"map_cut_{i}")
    # for i in range(1, 176):
    #     measures.append(f"P_{i}")
    # for i in range(1, 176):
    #     measures.append(f"recall_{i}")

    # eval_params = {
    #     'measures': measures,
    #     'qrels_path' : os.path.join(os.path.dirname(config['data']['run_path']),'qrels.txt'),
    #     'logging': {'level': 'DEBUG'},
    # }

    # with open(os.path.join(full_path, "eval_config.yaml"), 'w') as f:
    #     yaml.dump(eval_params, f)

In [58]:
EXP_DIR = 'trials/test_param_writer'
if not os.path.exists(EXP_DIR):
    os.makedirs(EXP_DIR)

BASE_CONFIG_PATH = 'configs/base_config_dec23.yaml'

In [59]:
#set the parmeters
#options/explanations of params:
'''
param_grid = {
}
'''

# Define the parameters
param_grid = {
    'data': ['beir\\nfcorpus'],
    'embedder': ['miniLM'],
    ###device params
    'gpu_batch_size': [1024],
    'device': ['gpu'],
    ###variances
    'noise': [0.001],
    'sigma_sig': [0.01],
    ###acquisition_f
    'acquisition_f': ['ucb_const_beta'],
    ###gr-eps
    'epsilon': [0.3],
    ###ucb
    'ucb_beta_const': [1,2],
}



In [60]:
# Load the base config file
with open(BASE_CONFIG_PATH, 'r') as f:
    base_config = yaml.safe_load(f)

#for file naming, don't use these param keys:
PARAM_NAMES_TO_OMMIT = {'dataset', 'embedder'} #

# Generate and save config files for each combination
for param_values in product(*param_grid.values()):
    param_values_dict = dict(zip(param_grid.keys(), param_values))
    experiment_name = os.path.join(*[
        str(value)
        for param, value in param_values_dict.items()
        if param not in PARAM_NAMES_TO_OMMIT
    ])
    updated_config = yaml.safe_load(yaml.dump(base_config))  # deep copy

    # Apply updates to the config based on the current parameter values
    var_config_mapping = map_vars_to_config(
        #dimensionality can be set this way... for gemini
        data=param_values_dict.get('data'),
        embedder=param_values_dict.get('embedder'),
        gpu_batch_size=param_values_dict.get('gpu_batch_size'),
        noise=param_values_dict.get('noise'),
        sigma_sig=param_values_dict.get('sigma_sig'),
        acquisition_f=param_values_dict.get('acquisition_f'),
        ucb_beta_const=param_values_dict.get('ucb_beta_const'),
        epsilon=param_values_dict.get('epsilon'),
    )
    for param, value in param_values_dict.items():
        if param in var_config_mapping and value in var_config_mapping[param]:
            deep_update(updated_config, var_config_mapping[param][value])
        else:
            print(f"No mapping found for parameter '{param}' with value '{value}'")

    save_config(updated_config, EXP_DIR, experiment_name)


No mapping found for parameter 'embedder' with value 'miniLM'
No mapping found for parameter 'embedder' with value 'miniLM'
