In [69]:
from langchain_community.document_loaders import NotebookLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_community.vectorstores import SKLearnVectorStore
from langchain_ollama import OllamaEmbeddings

import json

In [46]:
loader = NotebookLoader(
    "../all_functions.ipynb",
    include_outputs=False,
    remove_newline=True,
)

In [47]:
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=50, chunk_overlap=0
)
# Split the documents into chunks
doc_splits = text_splitter.split_documents(loader.load())

In [48]:
vectorstore = SKLearnVectorStore.from_documents(
    documents=doc_splits,
    embedding=OllamaEmbeddings(model="llama3.1:8b-instruct-fp16")
)
retriever = vectorstore.as_retriever(k=4)

In [None]:
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Define the prompt template for the LLM
prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks.
    Use the following documents to answer the question.
    If you don't know the answer, just say that you don't know.
    Use three sentences maximum and keep the answer concise:
    Question: {question}
    Documents: {documents}
    Answer:
    """,
    input_variables=["question", "documents"],
)
# Initialize the LLM with Llama 3.1 model
llm = ChatOllama(
    model="llama3.1",
    temperature=0,
)
# Create a chain combining the prompt template and LLM
rag_chain = prompt | llm | StrOutputParser()
# Define the RAG application class
class RAGApplication:
    def __init__(self, retriever, rag_chain):
        self.retriever = retriever
        self.rag_chain = rag_chain
    def run(self, question):
        # Retrieve relevant documents
        documents = self.retriever.invoke(question)
        # Extract content from retrieved documents
        doc_texts = "\\n".join([doc.page_content for doc in documents])
        # Get the answer from the language model
        answer = self.rag_chain.invoke({"question": question, "documents": doc_texts})
        return answer
# Initialize the RAG application
rag_application = RAGApplication(retriever, rag_chain)
# Example usage
question = "What is the capital of Germany?"
answer = rag_application.run(question)
print("Question:", question)
print("Answer:", answer)

Question: What is the capital of Germany?
Answer: I don't know. The provided documents do not contain information about Germany's capital, but rather a function to retrieve a country's capital and a tip calculator for different countries' meal services.


In [50]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import chain


@chain
def retriever(query: str) -> List[Document]:
    docs, scores = zip(*vectorstore.similarity_search_with_score(query))
    for doc, score in zip(docs, scores):
        doc.metadata["score"] = score

    return docs

In [58]:
result = retriever.invoke('What is the average airspeed velocity of a swallow')
result

(Document(metadata={'id': '15de98e4-72ab-48e7-9c09-5905cacfd5a8', 'source': '../all_functions.ipynb', 'score': 0.7404258977853408}, page_content="str: A JSON string with the current time in PST, temperature, weather, and the city,', '             or an error message if the input is"),
 Document(metadata={'id': 'e885a2fe-7d5d-4970-a4e1-274cb0f3cb45', 'source': '../all_functions.ipynb', 'score': 0.7428807921438789}, page_content="- 25% for excellent service', '    For other countries, the tip percentages are:', '        - 5% for poor service', '        - 10% for satisfactory"),
 Document(metadata={'id': '5fd914d6-cc8b-4d41-8d76-2c8b2bb47531', 'source': '../all_functions.ipynb', 'score': 0.7768644949093271}, page_content='\'    """\', \'    Calculates the total duration of a playlist and returns the result as a JSON string.\', \'\', \'    This function computes the total duration of a playlist based on the number of songs'),
 Document(metadata={'id': '1a30fee1-b5f2-400f-a8c2-731568ca5da2'

In [67]:
max([i.metadata['score'] for i in result])

0.7802176961760763

In [72]:
with open('failed_queries.json', 'r') as file:
    data = json.load(file)

max_values = []

# Iterate over the queries and print each query
for item in data["queries"]:
    result = retriever.invoke(item["query"])
    max_values.append(max([i.metadata['score'] for i in result]))

In [80]:
average_max_value = sum(max_values) / len(max_values)
average_max_value

0.7143435628245849