In [1]:
import sys
sys.path.insert(0, "/home/dzigen/Desktop/ITMO/smiles2024/RAG-project-SMILES-2024-")

import pandas as pd 
import ast
import numpy as np
import json
from tqdm import tqdm
from IPython.display import clear_output
import os

from src.Retriever import ThresholdRetriever, ThresholdRetrieverConfig
from src.Reader import LLM_model, LLM_Hardw_Conf, LLM_Hyper_Conf
from src.utils import RetrieverMetrics, ReaderMetrics

In [2]:
SAVE_LOGFILE = './logs/trial_1.json'
SAVE_ANSWERS = './logs/answers_1.json'
BENCHES_SIZE = 1000

# Retriever part
BENCHMARKS_INFO = {'mtssquad': {'db': 'v2', 'table': 'v1'}}
RETRIEVER_CUSTOM_ARGS = {
    "model_path": "/home/dzigen/Desktop/nlp_models/intfloat/multilingual-e5-small",
    "densedb_kwargs": {'metadata': {"hnsw:space": "ip"}},
    "model_kwargs": {'device':'cuda'},
    "encode_kwargs": {'normalize_embeddings': True, 'prompt': 'query: '},
    "params": {'fetch_k': 50, 'threshold': 0.2, 'max_k': 3}
}

# Reader part
READER_CUSTOME_ARGS = {
    "temperature": 0.2,
    "top_k": 40,
    "top_p": 0.95,
    "min_p": 0.05,
    "typical_p": 1,
    "max_tokens": -1
}


##### Configure Retriever-part

In [3]:
banchmark_paths = {}
for name, version in BENCHMARKS_INFO.items():
    banchmark_paths[name] = {
        'table': f"../../../data/{name}/tables/{version['table']}/benchmark.csv",
        'dense_db': f"../../../data/{name}/dbs/{version['db']}/densedb"
    }

benchmark_config = {}
for name, paths in banchmark_paths.items():
    
    RETRIEVER_CUSTOM_ARGS['densedb_path'] = banchmark_paths[name]['dense_db']
    RETRIEVER_CUSTOM_ARGS['densedb_kwargs']['name'] = name

    config = ThresholdRetrieverConfig(**RETRIEVER_CUSTOM_ARGS)

    benchmark_config[name] = config

In [4]:
# загрузить benchmark-датасет
benchmarks_df = {}
for name, bench_path in banchmark_paths.items():
    benchmarks_df[name] = pd.read_csv(banchmark_paths[name]['table'], sep=';').iloc[:BENCHES_SIZE,:]
    benchmarks_df[name]['chunk_ids'] = benchmarks_df[name]['chunk_ids'].map(lambda v: ast.literal_eval(v)) 
    benchmarks_df[name]['contexts'] = benchmarks_df[name]['contexts'].map(lambda v: ast.literal_eval(v)) 


In [5]:
# инициализировать ретриверов
retrievers = {name: ThresholdRetriever(b_config) for name, b_config in benchmark_config.items()}

No sentence-transformers model found with name /home/dzigen/Desktop/nlp_models/intfloat/multilingual-e5-small. Creating a new one with mean pooling.


##### Configure Reader-part

In [6]:
config1 = LLM_Hardw_Conf()
config2 = LLM_Hyper_Conf(**READER_CUSTOME_ARGS)

In [7]:
reader_metrics = ReaderMetrics(base_dir="/home/dzigen/Desktop/ITMO/smiles2024/RAG-project-SMILES-2024-")

Loading Meteor...
Loading ExactMatch


In [8]:
reader = LLM_model(config1, config2)

##### Evaluating pipeline

In [None]:
benchmarks_score = {}
gen_answers = {}
for i, name in enumerate(benchmarks_df.keys()):
    scores = {
        'bleu2': [],
        'bleu1': [],
        'exact_match': [],
        'meteor': []
    }
    gen_answers[name] = []

    bench_size = benchmarks_df[name].shape[0]
    for j in range(bench_size):
        question = benchmarks_df[name]['question'][j]
        relevant_docs = retrievers[name].invoke(question)

        raw_gen_answer = reader.generate(question, list(map(lambda v: v[1], relevant_docs)))
        gen_answer = raw_gen_answer['choices'][0]['message']['content']
        gold_answer = benchmarks_df[name]['answer'][j]

        #
        gen_answers[name].append(gen_answer)

        #
        scores['bleu1'].append(reader_metrics.bleu1([gen_answer], [gold_answer]))
        scores['bleu2'].append(reader_metrics.bleu2([gen_answer], [gold_answer]))
        scores['exact_match'].append(reader_metrics.exact_match([gen_answer], [gold_answer]))
        scores['meteor'].append(reader_metrics.meteor([gen_answer], [gold_answer]))

        clear_output(wait=True)
        print(f'''[{j} / {bench_size}] Median scores ({name}):
              bleu1: {np.median(scores['bleu1'])}
              bleu2: {np.median(scores['bleu2'])}
              exact_match: {np.median(scores['exact_match'])}
              meteor: {np.median(scores['meteor'])}
              
              Last sample:
              - question: {question}
              - gold_answer: {gold_answer}
              - gen_answer: {gen_answer}''')

    benchmarks_score[name] = scores
        

##### Saveing logs

In [None]:
# сохранить результат
if os.path.exists(SAVE_LOGFILE) or os.path.exists(SAVE_ANSWERS):
    print("Файл существует!")
    raise ValueError

log_data = {'info': BENCHMARKS_INFO, 
            'benchmark_sizes': BENCHES_SIZE,
            'reader': READER_CUSTOME_ARGS,
            'retriever': RETRIEVER_CUSTOM_ARGS,
            'scores': benchmarks_score}

with open(SAVE_LOGFILE, 'w', encoding='utf-8') as fd:
    fd.write(json.dumps(log_data, indent=1))

with open(SAVE_ANSWERS, 'w', encoding='utf-8') as fd:
    fd.write(json.dumps(gen_answers, indent=1))