In [5]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from dotenv import load_dotenv

load_dotenv()

loader = TextLoader("catbank.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

Created a chunk of size 411, which is longer than the specified 300
Created a chunk of size 301, which is longer than the specified 300


In [6]:
# RAGAS expects a file_name dict as key
for document in chunks:
    document.metadata['file_name'] = document.metadata['source']

In [7]:
chunks

[Document(metadata={'source': 'catbank.txt', 'file_name': 'catbank.txt'}, page_content='In the heart of the bustling city of Whiskerville, there stood a peculiar establishment unlike any other - the Feline Financial Bank, a bank exclusively for cats. This extraordinary institution was founded by Sir Purrington, a visionary Siamese cat who believed that every feline deserved a place to secure their precious belongings - be it their beloved toys, treasured catnip, or the much-coveted tuna treats.'),
 Document(metadata={'source': 'catbank.txt', 'file_name': 'catbank.txt'}, page_content='The architecture of the bank was a marvel in itself. Crafted with sleek lines and comfortable lounging spots, it blended elegance with cat-friendly design. The entrance featured a grand revolving door, sized perfectly for all breeds, from the majestic Maine Coons to the dainty Munchkins.'),
 Document(metadata={'source': 'catbank.txt', 'file_name': 'catbank.txt'}, page_content='Inside, the bank was a haven 

In [8]:
from langchain_openai import ChatOpenAI
# from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
# from ragas.testset.generator import TestsetGenerator
# from ragas.testset.evolutions import simple, reasoning, multi_context
import nest_asyncio
nest_asyncio.apply()

llm = ChatOpenAI(openai_api_base="http://localhost:5000/v1", openai_api_key="lm-studio")
emb = HuggingFaceEmbeddings(model_name="sentence-transformers/allenai-specter", model_kwargs={"device": "cpu"})

# generator = TestsetGenerator.from_langchain(
#     generator_llm=llm,
#     critic_llm=llm,
#     embeddings=emb,
# )

# testset = generator.generate_with_langchain_docs(chunks, test_size=1, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25}, raise_exceptions=False)



In [9]:
from langchain_community.vectorstores import Chroma


vectorstore = Chroma.from_documents(chunks, emb)
retriever = vectorstore.as_retriever()

In [10]:
from langchain_core.prompts import PromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["context","question"]
  )

In [12]:
from langchain_core.runnables import RunnablePassthrough
from langchain.output_parser import StrOutputParser

rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

ModuleNotFoundError: No module named 'langchain.runnables'

In [None]:
from datasets import Dataset

# Simulate a testset with arXiv-related questions and ground truths
testset_data = {
    "question": [
        "What is the main contribution of the paper on transformer models?",
        "How does the paper address the vanishing gradient problem?",
        "What dataset was used in the GAN paper?",
        "Can you summarize the results of the paper on quantum computing?",
        "What is the key finding in the latest NLP paper?",
        "How does the proposed method in the reinforcement learning paper work?",
        "What optimization techniques were used in the neural network paper?",
        "What are the future research directions mentioned in the computer vision paper?",
        "What are the applications of the proposed model in the graph networks paper?",
        "How does the paper on unsupervised learning differ from traditional methods?"
    ],
    "ground_truth": [
        "The paper proposes a new architecture for transformers that improves efficiency in training large models.",
        "The paper addresses the vanishing gradient problem by introducing residual connections.",
        "The paper used the CIFAR-10 dataset for training the GAN model.",
        "The paper shows promising results in quantum error correction using a new approach.",
        "The key finding is the improvement of language model performance by using novel training techniques.",
        "The proposed method is a deep Q-learning algorithm with additional exploration mechanisms.",
        "Adam optimizer and batch normalization were primarily used in the paper.",
        "Future research directions include extending the method to handle multimodal data.",
        "The proposed model has applications in social network analysis and biological systems.",
        "The paper introduces self-supervised learning, which differs by not requiring labeled data."
    ]
}

# Create the testset
testset = Dataset.from_dict(testset_data)

# Prepare the questions and ground truths
questions = testset.to_pandas()["question"].to_list()
ground_truth = testset.to_pandas()["ground_truth"].to_list()

# Initialize an empty data dictionary for the new dataset
data = {"question": [], "answer": [], "contexts": [], "ground_truth": ground_truth}

# Simulate RAG process: querying and retrieving documents
for query in questions:
    # Mock RAG chain and retriever responses (replace with actual RAG chain and retriever)
    mock_answer = "This is a mock answer for query: " + query  # Simulate answer from RAG
    mock_context = ["This is a context for the query: " + query]  # Simulate context from retriever
    
    # Append the results to the data dictionary
    data["question"].append(query)
    data["answer"].append(mock_answer)
    data["contexts"].append(mock_context)

# Create a dataset from the dictionary
dataset = Dataset.from_dict(data)

# Output the generated dataset (for display or further evaluation)
print(dataset)

In [None]:
from ragas import evaluate
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_relevancy,
    context_recall,
    context_precision,
)

result = evaluate(
    dataset = dataset,
    metrics=[
        context_relevancy,
        context_precision,
        context_recall,
        faithfulness,
        answer_relevancy,
    ],
)

In [None]:
result.to_pandas()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

df = result.to_pandas()

heatmap_data = df[['context_relevancy', 'context_precision', 'context_recall', 'faithfulness', 'answer_relevancy']]

cmap = LinearSegmentedColormap.from_list('green_red', ['red', 'green'])

plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data, annot=True, fmt=".2f", linewidths=.5, cmap=cmap)

plt.yticks(ticks=range(len(df['question'])), labels=df['question'], rotation=0)

plt.show()


### Add LangFuse

In [None]:
from langfuse import Langfuse

langfuse = Langfuse(
  secret_key="sk-lf-8be80c67-4187-4e43-9d01-544195dc9f03",
  public_key="pk-lf-d7653f64-8086-4365-b05c-865ead3478a3",
  host="http://localhost:3000"
)

In [None]:
trace = langfuse.trace(
    name = "eval",
    user_id = "eval_user",
    metadata = {
        "email": "prod@company.com",
    },
    tags = ["evaluation"]
)

In [None]:
df = result.to_pandas()

In [None]:
for _, row in df.iterrows():
    for metric_name in ["faithfulness", "answer_relevancy", "context_recall"]:
        langfuse.score(
            name=metric_name,
            value=row[metric_name],
            trace_id=trace.id
        )