In [1]:
from langchain_openai import ChatOpenAI

from langchain_chroma import Chroma
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)

from langchain.prompts import ChatPromptTemplate

from langchain.load import dumps, loads

from langchain_core.output_parsers import StrOutputParser
from operator import itemgetter

import pandas as pd

from tqdm import tqdm

In [2]:
from dotenv import load_dotenv
# Load environment variables
load_dotenv('../../.env')

True

# Создание датасета

In [14]:
def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

In [44]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0, max_tokens = 1000)

embedding_function = SentenceTransformerEmbeddings(model_name="DeepPavlov/rubert-base-cased-sentence", model_kwargs={'device': 'cpu'})
db = Chroma(embedding_function=embedding_function, persist_directory="../../artifacts/chroma_db")

retriever = db.as_retriever()

template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

retrieval_chain = generate_queries | retriever.map() | get_unique_union

template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": retrieval_chain, 
    "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

No sentence-transformers model found with name DeepPavlov/rubert-base-cased-sentence. Creating a new one with mean pooling.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [45]:
questions = []
with open('../../data/questions.txt', 'r') as f:
    questions = list(f.read().split('\n'))

In [59]:
df_answers = pd.DataFrame([], columns=['question', 'answer'])

In [60]:
for question in tqdm(questions):

    if question in list(df_answers['question']):
        continue
    
    res = final_rag_chain.invoke({"question":question})

    df_answers.loc[len(df_answers)] = [question, res]

df_answers.to_csv('../../data/question_answers.csv', index=False)

100%|██████████| 251/251 [36:49<00:00,  8.80s/it] 


In [61]:
df_answers

Unnamed: 0,question,answer
0,"Как мне эффективно копить деньги, если у меня ...",Чтобы эффективно копить деньги при нестабильно...
1,"С чего начать планирование личного бюджета, ес...",Начать планирование личного бюджета можно с не...
2,"Как грамотно распределять доходы и расходы, чт...",Чтобы грамотно распределять доходы и расходы и...
3,Какие существуют способы автоматизировать сбер...,В предоставленном контексте не упоминаются кон...
4,Как определить оптимальный размер «подушки без...,Оптимальный размер «подушки безопасности» обыч...
...,...,...
246,"Что такое «облигации субфедерального уровня», ...",Облигации субфедерального уровня — это долговы...
247,Как лучше накапливать на дорогостоящую медицин...,Накапливать на дорогостоящую медицинскую опера...
248,Почему важно учитывать сезонный фактор в расхо...,"Учет сезонного фактора в расходах, связанных с..."
249,"Что делать, если растут расценки на услуги ЖКХ...","Если расценки на услуги ЖКХ растут, а ваш дохо..."


# metrics

In [3]:
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [4]:
class Metrics(BaseModel):
    correctness: float = Field(description="accuracy of facts and absence of errors")
    fullness: float = Field(description="coverage of all aspects of the question")
    relevance: float = Field(description="alignment with the context and essence of the question")
    clarity: float = Field(description="clarity and accessibility of language")
    conciseness: float = Field(description="brevity without losing essential information")
    detail: float = Field(description="thoroughness and depth of explanation")
    usefulness: float = Field(description="real-world applicability of the answer")
    coherence: float = Field(description="logical consistency within the answer")
    safety: float = Field(description="absence of harmful or dangerous content")

parser = JsonOutputParser(pydantic_object=Metrics)

template = """You are a qualified finantial inspector. You'll be given with a pair question-answer. Your task is to rank the quality of the answer from 0 to 1.
The metrics are:
	{metrics}

Question: 
{question}

Answer: 
{answer}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["question", "answer"],
    partial_variables={"metrics": parser.get_format_instructions()},
)

llm = ChatOpenAI(model="gpt-4o", temperature=0)

chain = prompt | llm | parser

In [5]:
qa = pd.read_csv('../../data/question_answers.csv').values

In [127]:
final_metrics = []

for qu, an in tqdm(qa):
    
    res = chain.invoke({
        "question": qu,
        "answer": an
    })

    final_metrics.append(res)

100%|██████████| 251/251 [07:09<00:00,  1.71s/it]


In [128]:
df_metrics = pd.DataFrame(final_metrics)
df_metrics

Unnamed: 0,correctness,fullness,relevance,clarity,conciseness,detail,usefulness,coherence,safety
0,0.9,0.8,0.9,0.9,0.8,0.8,0.9,0.9,0.9
1,1.0,0.9,1.0,1.0,0.9,0.9,1.0,1.0,1.0
2,1.0,1.0,1.0,1.0,0.9,1.0,1.0,1.0,1.0
3,0.9,0.8,0.9,0.9,0.8,0.7,0.9,0.9,1.0
4,1.0,0.9,1.0,1.0,0.9,0.9,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...
246,1.0,1.0,1.0,1.0,0.9,1.0,1.0,1.0,1.0
247,1.0,0.9,1.0,1.0,0.9,0.8,1.0,1.0,1.0
248,1.0,1.0,1.0,1.0,0.9,1.0,1.0,1.0,1.0
249,1.0,1.0,1.0,1.0,0.9,1.0,1.0,1.0,1.0


In [130]:
df_metrics.mean(axis=0)

correctness    0.967729
fullness       0.902789
relevance      0.982072
clarity        0.960159
conciseness    0.862151
detail         0.895219
usefulness     0.966932
coherence      0.978088
safety         0.998805
dtype: float64