In [None]:
import os
import torch

from elasticsearch import Elasticsearch
from pathlib import Path
from tqdm.auto import tqdm
from dotenv import load_dotenv
from mistralai import Mistral
from sentence_transformers import CrossEncoder

load_dotenv()

True

In [8]:
# Step 1: Load the lotr.txt file
file_path = Path("../../lotr.txt")
with open(file_path, "r") as file:
    text = file.read()
    
print(text[:468])

chapters = text.split("\n\n")

Three Rings for the Elven-kings under the sky,
               Seven for the Dwarf-lords in their halls of stone,
            Nine for Mortal Men doomed to die,
              One for the Dark Lord on his dark throne
           In the Land of Mordor where the Shadows lie.
               One Ring to rule them all, One Ring to find them,
               One Ring to bring them all and in the darkness bind them
           In the Land of Mordor where the Shadows lie.
    


In [3]:
# Step 2: Create an elasticsearch client (local)
# client = Elasticsearch([{'host': 'localhost', 'port': 9200, 'scheme': 'http'}])

# index_name = "lotr_index"

# if not client.indices.exists(index=index_name):
#     client.indices.create(index=index_name)

# for i, chapter in tqdm(enumerate(chapters)):
#     doc = {
#         "chapter_number": i + 1,
#         "content": chapter
#     }
#     client.index(index=index_name, id=i + 1, document=doc)

In [None]:
# Step 2: Create an elasticsearch client (managed)
client = Elasticsearch(
    os.getenv("ELASTIC_CLOUD_URL"),
    api_key=os.getenv("ELASTICSEARCH_API_KEY")
)

In [13]:
# Create an index with the specified mappings
index_name = "lotr_index"
if not client.indices.exists(index=index_name):
    client.indices.create(
        index=index_name,
        mappings={
            "properties": {
                "text": {"type": "text"}
            }
        }
    )

# Index each chapter
for i, chapter in enumerate(chapters):
    doc = {
        "text": chapter
    }
    client.index(index=index_name, id=i + 1, document=doc)

In [14]:
def keyword_search(query, index_name="lotr_index"):
    res = client.search(index=index_name, query={"match": {"text": query}})
    return res['hits']['hits']

# Example usage
results = keyword_search("Frodo")
for hit in results:
    print(f"Chapter {hit['_id']}: {hit['_source']['text'][:100]}...")

Chapter 766:      He was naked, lying as if in a swoon on a heap of filthy rags: his arm was flung up, shielding ...
Chapter 37:      Frodo took it from his breeches-pocket, where it was clasped to a chain that hung from his belt...
Chapter 344:      Aragorn sprang swiftly away and went in pursuit of Sam. Just as he reached the little lawn amon...
Chapter 56:      The song ended. 'And _now_ to bed! And _now_ to bed!' sang Pippin in a high voice.
     'Hush!'...
Chapter 73: 
     There was a terrific splash, and a shout of _Whoa!_ from Frodo. It appeared that a lot of Pipp...
Chapter 770:      Sam had just wits enough left to thrust the phial back into his breast. 'Run, Mr. Frodo!' he cr...
Chapter 589:      Faramir sat for a moment in thought. `Very good,' he said at last. `I surrender you to your mas...
Chapter 767:      Frodo sat for a while and shivered, dreadful fears chasing one another through his mind. Then h...
Chapter 34:      It was just at this time that Gandalf reappeared a

In [None]:
# Step 3: Vector search
vector_index_name = "lotr_vectors"
client.indices.create(
    index=vector_index_name,
    mappings={
        "properties": {
            "vector": {"type": "dense_vector", "dims": 1024},
            "text": {"type": "text"}
        }
    }
)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'lotr_vectors'})

In [12]:
batch_size = 20
batches = [chapters[i:i + batch_size] for i in range(0, len(chapters), batch_size)]

# Generate embeddings using Mistral AI
api_key = os.getenv("MISTRAL_API_KEY")
model = "mistral-embed"

mistral_client = Mistral(api_key=api_key)


In [None]:
# Iterate over each batch and generate embeddings
for batch_index, batch in enumerate(batches):
    embeddings_batch_response = mistral_client.embeddings.create(
        model=model,
        inputs=batch,  # Send the current batch of chapters
    )

    # Extract and store embeddings for the current batch
    for i, response in enumerate(embeddings_batch_response.data):
        embedding_vector = response.embedding
        chapter_index = batch_index * batch_size + i + 1  # Calculate the chapter number
        chapter_content = batch[i]

        # Index the chapter along with its embedding in Elasticsearch
        doc = {
            "chapter_number": chapter_index,
            "content": chapter_content,
            "embedding": embedding_vector
        }
        client.index(index=vector_index_name, id=chapter_index, document=doc)
    
    print(f"Generated and indexed embeddings for batch with {len(batch)} chapters.")

In [55]:
# Define a function to perform a vector search in Elasticsearch
vector_index_name = "lotr_vectors"
def vector_search(query_embedding, index_name=vector_index_name, size=5):
    script_score_query = {
        "script_score": {
            "query": {"match_all": {}},
            "script": {
                "source": "cosineSimilarity(params.query_vector, 'embedding') + 1.0",
                "params": {"query_vector": query_embedding}
            },
            "min_score": 0.0
        }
    }
    response = client.search(
        index=index_name,
        query=script_score_query,
        size=size
    )
    return response['hits']['hits']

def get_query_embeddings(query):
    embeddings_batch_response = mistral_client.embeddings.create(
        model="mistral-embed",
        inputs=query,  # Send the current batch of chapters
    )
    return embeddings_batch_response.data[0].embedding

# Enter your sample query here: 
sample_query = "What was Galadriels gift to Gimli in Lorien?"

sample_query_embedding = get_query_embeddings(sample_query)  # Replace with actual query embedding
results = vector_search(sample_query_embedding, size=40)
documents = ["Chapter ID: " + str(hit['_source']['chapter_number']) + hit['_source']['content'] for hit in results]
for hit in results:
    print(f"Chapter {hit['_source']['chapter_number']}: {hit['_source']['content']}...")

Chapter 460:      The postern was closed again, the iron door was barred and piled inside with stones. When all were safe within, Éomer turned: 'I thank you, Gimli son of Glóin!' he said. 'I did not know that you were with us in the sortie. But oft the unbidden guest proves the best company. How came you there?'
     'I followed you to shake off sleep,' said Gimli; 'but I looked on the hillmen and they seemed over large for me, so I sat beside a stone to see your sword-play.'
     'I shall not find it easy to repay you,' said Éomer.
     'There may be many a chance ere the night is over,' laughed the Dwarf. 'But I am content. Till now I have hewn naught but wood since I left Moria.'
     'Two!' said Gimli, patting his axe. He had returned to his place on the wall.
     'Two?' said Legolas. 'I have done better, though now I must grope for spent arrows; all mine are gone. Yet I make my tale twenty at the least. But that is only a few leaves in a forest.'...
Chapter 333:      Aragorn stay

In [56]:
# Step 4: Reranking search results
model = CrossEncoder("mixedbread-ai/mxbai-rerank-xsmall-v1")

def rerank_results(query, documents, model, top_k):
    results = model.rank(query, documents, return_documents=True, top_k=top_k)
    return results


reranked_results = rerank_results(sample_query, documents, model=model, top_k=10)
# Concatenate the results into a long string
output_string = "Reranked results:\n"
for idx, result in enumerate(reranked_results, 1):
    output_string += f"Result {idx}. Relevancy Score: {result['score']:.4f}\n"
    output_string += f"   Document: {result['text']}\n\n"

print(output_string)

Reranked results:
Result 1. Relevancy Score: 0.2761
   Document: Chapter ID: 737     The morning came after the day of battle, and it was fair with light clouds and the wind turning westward. Legolas and Gimli were early abroad, and they begged leave to go up into the City; for they were eager to see Merry and Pippin.
     'It is good to learn that they are still alive,' said Gimli; 'for they cost us great pains in our march over Rohan, and I would not have such pains all wasted.'
     Together the Elf and the Dwarf entered Minas Tirith, and folk that saw them pass marvelled to see such companions; for Legolas was fair of face beyond the measure of Men, and he sang an elven-song in a clear voice as he walked in the morning; but Gimli stalked beside him, stroking his beard and staring about him.
     'There is some good stone-work here,' he said as he looked at the walls; 'but also some that is less good, and the streets could be better contrived. When Aragorn comes into his own, I shal

In [57]:
# Step 5: LLM and final steps
chat_response = mistral_client.chat.complete(
    model = "mistral-small-latest",
    messages = [
        {
            "role": "system",
            "content": """
            Your task is to answer questions to the Lord of the Rings books based on search results. 
            Keep your answer brief. Only use the Context infromation to answer the question! Always cite your answer with the sources, otherwise the answer is useless!
            
            ## Output style
            Cite the documents from the context that you used like this: 
            -- Your answer is here --
            [Result #] Chapter: Chapter ID number


            ## Example 
            Question: Where was the ring forged? 
            Answer: The ring was forged in Mount Doom.\n [Result 4] Chapter: 123
            """,
        },
        {
            "role": "user",
            "content": f"Context information to answer the user question: {output_string}\n\n--- End of the references ---\n\nUser question: {sample_query}\n\nAnswer:",
        },
    ],
    max_tokens=100
)

answer = chat_response.choices[0].message.content 
print(answer)

Galadriel gave Gimli three hairs from her head, which he treasured greatly. This is implied by Gimli's statement: "I have looked the last upon that which was fairest... Henceforward I will call nothing fair, unless it be her gift." He put his hand to his breast, suggesting he kept the gift close to him.
[Result 6] Chapter: 337
