In [1]:
!pip install -q elasticsearch ragas datasets langchain-openai


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import os
import json
from getpass import getpass
from elasticsearch import Elasticsearch
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision
from datasets import Dataset
from langchain_openai import ChatOpenAI

In [3]:
es = Elasticsearch(
    getpass("Host: "),
    api_key=getpass("API Key: "),
)

Host:  ········
API Key:  ········


In [4]:
index_name = "ragas-books"

In [5]:
def embed_query(text: str):
    
    res = es.ml.infer_trained_model(
        model_id=".multilingual-e5-small",
        body={"docs": [{"text_field": f"query: {text}"}]}
    )
    
    vec = res["inference_results"][0]["predicted_value"]
    
    return vec

In [6]:
if es.indices.exists(index=index_name):
    es.indices.delete(index=index_name)
    print(f"Deleted existing index '{index_name}'")

es.indices.create(index=index_name, body={
    "mappings": {
        "properties": {
            "book_title": {"type": "text"},
            "author_name": {"type": "text"},
            "book_description": {"type": "text"},
            "rating_score": {"type": "float"},
            "embedding": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            }
        }
    }
})
print(f"Created index '{index_name}'")

Deleted existing index 'ragas-books'
Created index 'ragas-books'


In [7]:
with open("books.json") as f:
    books = json.load(f)

for i, book in enumerate(books, 1):
    try:
        book["embedding"] = embed_query(book["book_description"])
        es.index(index=index_name, document=book)
        print(f"Indexed {i}: {book['book_title']}")
    except Exception as e:
        print(f"Failed to index '{book.get('book_title', 'Unknown')}': {e}")

Indexed 1: Lucky 7
Indexed 2: Salvation Lost
Indexed 3: Alien Warrior's Mate
Indexed 4: On the Steel Breeze
Indexed 5: Salvage Marines
Indexed 6: Trade Secret
Indexed 7: There Will Be Time
Indexed 8: Only in Death
Indexed 9: His To Claim
Indexed 10: Savage Drift
Indexed 11: Light of the Jedi
Indexed 12: Mega Robo Bros
Indexed 13: Transmetropolitan, Vol. 1: Back on the Street
Indexed 14: The Queen of Traitors
Indexed 15: The Island of Doctor Moreau
Indexed 16: Human Nature
Indexed 17: Legion
Indexed 18: Wolfsbane
Indexed 19: Lamb: The Gospel According to Biff, Christ's Childhood Pal
Indexed 20: Our Pet
Indexed 21: The Aylesford Skull
Indexed 22: Ghosts of War
Indexed 23: The Book of Time
Indexed 24: Because It Is My Blood
Indexed 25: The Annals of the Heechee


In [8]:
def vector_search(query, top_k=3):
    
    query_vector = embed_query(query)
    
    body = {
        "knn": {
            "field": "embedding",
            "k": top_k,
            "num_candidates": 100,
            "query_vector": query_vector
        },
        "_source": ["book_title", "author_name", "book_description", "rating_score"]
    }
    
    res = es.search(index=index_name, body=body)
    hits = res["hits"]["hits"]
    contexts, books_info = [], []
    
    for hit in hits:
        book = hit["_source"]
        context = f"{book['book_title']} by {book['author_name']}: {book['book_description']}"
        contexts.append(context)
        books_info.append(book)
    
    return contexts, books_info

In [10]:
API_KEY = getpass("OPENAI_API_KEY: ")

chat_llm = ChatOpenAI(
    model="gpt-4o",  
    temperature=0.1,
    api_key=API_KEY  
)

OPENAI_API_KEY:  ········


In [11]:
def generate_answer(question, contexts):
    
    context_text = "\n\n".join(contexts)
    
    prompt = f"""You are a helpful assistant that recommends books.
Use only the information from the context below to answer the question.
Do not include any books, authors, or details that are not explicitly present in the context.

Repeat the exact book title and author from the context in your answer.

Context:
{context_text}

Question:
{question}

Answer:"""
    response = chat_llm.invoke(prompt)
    
    return response.content.strip()

In [12]:
def create_dynamic_ground_truth(question, books_info):
    
    if not books_info:
        return "No relevant books found."
        
    best_book = max(books_info, key=lambda x: float(x.get('rating_score', 0)))
    
    if "science fiction" in question.lower():
        return f"A good science fiction book is '{best_book['book_title']}' by {best_book['author_name']}."
        
    elif "fantasy" in question.lower():
        return f"'{best_book['book_title']}' by {best_book['author_name']}' is a good fantasy book."
        
    elif "mystery" in question.lower():
        return f"'{best_book['book_title']}' by {best_book['author_name']}' is a good mystery novel."
        
    else:
        return f"I recommend '{best_book['book_title']}' by {best_book['author_name']}."

In [13]:
def run_ragas_demo():
    print("🚀 Demo:\n")

    demo_questions = [
        "What's a good science fiction book with high ratings?",
        "Can you suggest a fantasy book by a popular author?", 
        "What's a highly rated mystery novel?",
        "Recommend a book with good reviews"
    ]

    questions, contexts_list, answers, ground_truths = [], [], [], []

    for i, question in enumerate(demo_questions, 1):
        print(f"\n📚 Question {i}: {question}")
        
        try:
            contexts, books_info = vector_search(question, top_k=3)
            if not contexts:
                print(f"No contexts found for question {i}")
                continue
                
            answer = generate_answer(question, contexts)
            print(f"Answer: {answer[:100]}...")
            ground_truth = create_dynamic_ground_truth(question, books_info)
            print(f"Ground Truth: {ground_truth}")
            questions.append(question)
            contexts_list.append(contexts)
            answers.append(answer)
            ground_truths.append(ground_truth)
            
        except Exception as e:
            print(f"Error processing question {i}: {e}")
            continue

    if not questions:
        print("\nNo valid Q&A pairs generated.")
        return None

    eval_dataset = Dataset.from_dict({
        "question": questions,
        "contexts": contexts_list,
        "answer": answers,
        "ground_truth": ground_truths,
    })

    print("\n✨ Running Ragas evaluation...")
    try:
        result = evaluate(
            dataset=eval_dataset,
            metrics=[context_precision, faithfulness, answer_relevancy],
            llm=chat_llm,
            embeddings=None
        )
        
        df = result.to_pandas()
        
        print("\n✨ Ragas Evaluation Results:")
        print(df)
        
        print("\✨ Averages:")
        
        for metric, value in df.mean(numeric_only=True).items():
            print(f"{metric}: {value:.3f}")
            
        df.to_csv("ragas_evaluation.csv", index=False)
        print("\nResults saved to 'ragas_evaluation.csv'")
        return result
        
    except Exception as e:
        print(f"Ragas evaluation failed: {e}")
        return None

In [14]:
try:
    results = run_ragas_demo()
    if results:
        print("\n🎉 Demo completed successfully!")
    else:
        print("\nDemo completed with issues.")
        
except Exception as e:
    print(f"\nError during demo: {e}")
    import traceback
    traceback.print_exc()

🚀 Demo:


📚 Question 1: What's a good science fiction book with high ratings?
Answer: "Light of the Jedi" by Charles Soule...
Ground Truth: A good science fiction book is 'Light of the Jedi' by Charles Soule.

📚 Question 2: Can you suggest a fantasy book by a popular author?
Answer: The Book of Time by Guillaume Prévost...
Ground Truth: 'Trade Secret' by Sharon Lee' is a good fantasy book.

📚 Question 3: What's a highly rated mystery novel?
Answer: A highly rated mystery novel is "Human Nature" by Jonathan Green....
Ground Truth: 'On the Steel Breeze' by Alastair Reynolds' is a good mystery novel.

📚 Question 4: Recommend a book with good reviews
Answer: The Island of Doctor Moreau by H.G. Wells...
Ground Truth: I recommend 'Savage Drift' by Emmy Laybourne.

✨ Running Ragas evaluation...


Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]


✨ Ragas Evaluation Results:
                                          user_input  \
0  What's a good science fiction book with high r...   
1  Can you suggest a fantasy book by a popular au...   
2               What's a highly rated mystery novel?   
3                 Recommend a book with good reviews   

                                  retrieved_contexts  \
0  [Light of the Jedi by Charles Soule: Two hundr...   
1  [Alien Warrior's Mate by Vi Voxley: He's damn ...   
2  [The Island of Doctor Moreau by H.G. Wells: Ra...   
3  [The Island of Doctor Moreau by H.G. Wells: Ra...   

                                            response  \
0               "Light of the Jedi" by Charles Soule   
1              The Book of Time by Guillaume Prévost   
2  A highly rated mystery novel is "Human Nature"...   
3          The Island of Doctor Moreau by H.G. Wells   

                                           reference  context_precision  \
0  A good science fiction book is 'Light of the J... 