<a href="https://colab.research.google.com/github/Elimeleth/aprende_a_crear_asistentes/blob/main/langchain_ragas_gro0q.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -qU langchain==0.2.7 tantivy lancedb langchain-groq langchain-google-genai sentence_transformers==3.0.1 langchain-community==0.2.7 langchain_experimental==0.0.62 torch==2.3.0 ragas==0.1.10

In [None]:
from google.colab import userdata

# Some async tasks need to be done
import nest_asyncio
nest_asyncio.apply()

In [None]:
# from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI
import time

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", api_key=userdata.get('GOOGLE_API_KEY'))
critic_llm = ChatGoogleGenerativeAI(temperature=0,model='gemini-1.5-flash', api_key =userdata.get('GOOGLE_API_KEY'))

In [None]:
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2"
)

In [None]:
def trace_time(func):
    def wrapper(*args, **kwargs):
        inicio = time.time()
        resultado = func(*args, **kwargs)
        fin = time.time()

        print("#"*20, "Tiempo de ejecucion", "#"*20)
        print(f"La función {func.__name__} tardó {fin - inicio:.4f} segundos en ejecutarse.")
        print("#"*50)

        return resultado
    return wrapper

In [None]:
from langchain.prompts import ChatPromptTemplate

@trace_time
def talk_to_assistant(input, prompt="Eres un asistente util"):
  return ChatPromptTemplate.from_template(prompt).pipe(llm).invoke({"input": input})


In [None]:
talk_to_assistant("hola")

#################### Tiempo de ejecucion ####################
La función talk_to_assistant tardó 0.5699 segundos en ejecutarse.
##################################################


AIMessage(content='¡Gracias! Me esfuerzo por ser lo más útil posible. ¿En qué puedo ayudarte hoy?', response_metadata={'token_usage': {'completion_tokens': 26, 'prompt_tokens': 42, 'total_tokens': 68, 'completion_time': 0.104, 'prompt_time': 0.009984519, 'queue_time': 0.19843529000000001, 'total_time': 0.113984519}, 'model_name': 'llama-3.1-70b-versatile', 'system_fingerprint': 'fp_b3ae7e594e', 'finish_reason': 'stop', 'logprobs': None}, id='run-28519759-deef-4e5a-9e50-d79bc01fda10-0', usage_metadata={'input_tokens': 42, 'output_tokens': 26, 'total_tokens': 68})

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker

# Load Data
import random

@trace_time
def loader_text_splitter(chunk_size, chunk_overlap):
  loader = TextLoader('./data.txt')
  documents = loader.load()
  text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  return text_splitter.split_documents(documents)



@trace_time
def loader_semantic_splitter(type):
  loader = TextLoader('./data.txt')
  documents = loader.load()
  text_splitter = SemanticChunker(embeddings, breakpoint_threshold_type=type)
  return text_splitter.split_documents(documents)

In [None]:
from langchain_community.vectorstores import LanceDB

@trace_time
def upsert_docs(docs):
  vector_store = LanceDB(
      table_name="test",
      embedding=embeddings,
  )
  vector_store.add_documents(docs)
  return vector_store


In [None]:
vector_store = upsert_docs(loader_text_splitter())



#################### Tiempo de ejecucion ####################
La función loader_text_splitter tardó 0.0088 segundos en ejecutarse.
##################################################
#################### Tiempo de ejecucion ####################
La función upsert_docs tardó 2.3003 segundos en ejecutarse.
##################################################


In [None]:
from langchain import PromptTemplate

prompt_template = """Use the following pieces of context to answer the question enclosed within  3 backticks at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Please provide an answer which is factually correct and based on the information retrieved from the vector store.
Please also mention any quotes supporting the answer if any present in the context supplied within two double quotes "" .


{context}


QUESTION:```{question}```
ANSWER:
"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context","question"]
  )
#
chain_type_kwargs = {"prompt": PROMPT}

In [None]:
from langchain.chains import RetrievalQA


@trace_time
def return_chain(llm, vector_store):
  retriever = vector_store.as_retriever(search_kwargs={"k": 5})
  return RetrievalQA.from_chain_type(llm=llm,
    chain_type="stuff",
    chain_type_kwargs={"prompt": PROMPT},
    retriever=retriever,
    return_source_documents=True)

In [None]:
import time
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context

generator = TestsetGenerator.from_langchain(
    generator_llm= llm,
    critic_llm=critic_llm,
    embeddings=embeddings,
)

In [None]:
testset = generator.generate_with_langchain_docs(
    loader_text_splitter(),
    test_size=5,
    distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25},
)

In [None]:
df = testset.to_pandas()
len(df)

3

In [None]:
len(df)

3

In [None]:
from datasets import Dataset

def create_eval_dataset(dataset, docs, eval_size,retrieval_window_size):
    vs = upsert_docs(docs)
    chain = return_chain(llm, vs)
    questions = []
    answers = []
    contexts = []
    ground_truths = []

    for i in range(eval_size):
        print("eval", i)
        entry = dataset.iloc[i]
        question = entry['question']
        ground_truth = entry['ground_truth']
        questions.append(question)
        ground_truths.append(ground_truth)
        response = chain(question)
        answer = response['result']
        context = [doc.page_content for doc in response['source_documents'][:retrieval_window_size]]
        contexts.append(context)
        answers.append(answer)

    rag_response_data = {
        "question": questions,
        "answer": answers,
        "contexts": contexts,
        "ground_truth": ground_truths
    }

    return rag_response_data


In [None]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
)

def evaluate_response_time_and_accuracy(docs, ds_dict, llm, embed_model):
    dataset = Dataset.from_dict(ds_dict)

    metrics = [
        faithfulness,
        answer_relevancy,
        context_precision,
        context_recall,
    ]


    # Evaluate using Ragas
    start_time = time.time()
    result = evaluate(
        metrics=metrics,
        dataset=dataset,
        llm=llm,
        embeddings=embed_model,
        raise_exceptions=False,
    )
    average_response_time = time.time() - start_time
    average_faithfulness = result['faithfulness']
    average_answer_relevancy = result['answer_relevancy']
    average_context_precision = result['context_precision']
    average_context_recall = result['context_recall']

    return (average_response_time, average_faithfulness, average_answer_relevancy,
            average_context_precision, average_context_recall)

In [None]:
CHUNK_SIZE = 45
EVAL_SIZE = len(df)
RETRIEVAL_WINDOW_SIZE = 2

In [None]:
def evaluate_chunk_size_chunk_overlap(docs, ds_dict):
  (avg_time, avg_faithfulness, avg_answer_relevancy,avg_context_precision, avg_context_recall) = evaluate_response_time_and_accuracy(docs, ds_dict, llm, embeddings)
  print(f"Chunk size {CHUNK_SIZE}, Overlap {chunk_overlap} - "
        f"Average Response time: {avg_time:.2f}s, "
        f"Average Faithfulness: {avg_faithfulness:.2f}, "
        f"Average Answer Relevancy: {avg_answer_relevancy:.2f}, "
        f"Average Context Precision: {avg_context_precision:.2f}, "
        f"Average Context Recall: {avg_context_recall:.2f}")

def evaluate_semantic_chunker(docs, ds_dict):
  (avg_time, avg_faithfulness, avg_answer_relevancy,avg_context_precision, avg_context_recall) = evaluate_response_time_and_accuracy(docs, ds_dict, llm, embeddings)
  print(f"Chunk size {CHUNK_SIZE}, Overlap {chunk_overlap} - "
      f"Average Response time: {avg_time:.2f}s, "
      f"Average Faithfulness: {avg_faithfulness:.2f}, "
      f"Average Answer Relevancy: {avg_answer_relevancy:.2f}, "
      f"Average Context Precision: {avg_context_precision:.2f}, "
      f"Average Context Recall: {avg_context_recall:.2f}")

In [None]:
docs = loader_text_splitter(40, 15)
ds_dict= create_eval_dataset(df,docs,EVAL_SIZE,RETRIEVAL_WINDOW_SIZE)

In [None]:
evaluate_chunk_size_chunk_overlap(docs, ds_dict)

In [None]:
for chunk_overlap in range(0,CHUNK_SIZE,15):
  docs = loader_text_splitter(CHUNK_SIZE, chunk_overlap)
  ds_dict= create_eval_dataset(df,docs,EVAL_SIZE,RETRIEVAL_WINDOW_SIZE)
  evaluate_chunk_size_chunk_overlap(docs, ds_dict)
  time.sleep(2)

In [None]:
evaluate_semantic_chunker("gradient")

In [None]:
for t in ["percentile","standard_deviation","interquartile","gradient"]:
  docs = loader_semantic_splitter(t)
  ds_dict= create_eval_dataset(df,docs,EVAL_SIZE,RETRIEVAL_WINDOW_SIZE)
  evaluate_semantic_chunker(docs, ds_dict)
  time.sleep(2)