In [None]:
import pandas as pd
from langchain_community.vectorstores import FAISS
from tqdm import tqdm
from joblib import Parallel, delayed
import time
import pickle
from langchain_gigachat import GigaChat, GigaChatEmbeddings

from src.retriever.retriever import Retriever
from src.rag.vanila_rag import RAG
from src.rag.adaptive_rag import AdaptiveRAG
from src.embedder.embedder_wrappers import Embedder_wrapper_e5_giga
from testing_rag import evaluate_RAG


%load_ext autoreload
%autoreload 2

In [None]:
llm = GigaChat(
    )

embedder = GigaChatEmbeddings(
)
wrapped_embedder = Embedder_wrapper_e5_giga(embedder)

In [None]:
n_retries = 10
def get_rag_answer(rag, query):
    for _ in range(n_retries):
        try:
            return rag.get_answer(query)
        except Exception as e:
            time.sleep(5)
            print('exception:', e)
def get_rag_answers(rag, save_path, queries=queries, n_jobs=4, prefer='threads'):
    result = Parallel(n_jobs=n_jobs, prefer=prefer)(delayed(get_rag_answer)(rag, query) for query in tqdm(queries))
    with open(save_path, 'wb') as f:
        pickle.dump(result, f)


### Vanila RAG

In [None]:
retriever = Retriever(index_path, 
                      wrapped_embedder,
                      strategy='ss',
                      k=5)

In [None]:
rag = RAG(llm, retriever)

In [None]:
get_rag_answers(rag, 'vanilla.pkl')

### Adaptive RAG

In [None]:
rag = AdaptiveRAG(llm, retriever)

In [None]:
get_rag_answers(rag, 'adaptive.pkl')
try:
    evaluate_RAG('adaptive.pkl')
except:
    1

## Query enrichment

In [None]:
from src.query_enrichment.query_enrichment import QueryEnrichment

In [None]:
def run_eval(name):
    enricher = QueryEnrichment(llm, [name])
    rag = RAG(llm, retriever, enricher)
    get_rag_answers(rag, f'{name}.pkl')

    evaluate_RAG(f'{name}.pkl')


### Hyde

In [None]:
run_eval('hyde')

In [None]:
run_eval('rephrase')

In [None]:
run_eval('step_back')

In [None]:
evaluate_RAG('step_back.pkl')

In [None]:
run_eval('answer')

In [None]:
res = pd.read_csv('vanilla.csv')
res = res.dropna()
res.describe().iloc[1]

In [None]:
res = pd.read_csv('hyde.csv')
res = res.dropna()
res.describe().iloc[1]

In [None]:
res = pd.read_csv('rephrase.csv')
res = res.dropna()
res.describe().iloc[1]

In [None]:
res = pd.read_csv('answer.csv')
res = res.dropna()
res.describe().iloc[1]

In [None]:
res = pd.read_csv('step_back.csv')
res = res.dropna()
res.describe().iloc[1]