In [1]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
# os.environ['LANGCHAIN_API_KEY'] = <your api key>

In [2]:
device = 'cuda:NVIDIA GeForce RTX 4090'

### Load documents

In [3]:
from pathlib import Path

from langchain_community.document_loaders import TextLoader


data_path = Path("vs_data")

documents = [
    TextLoader(doc_path).load()[0]
    for doc_path in data_path.iterdir()
]

### Split documents

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=300, 
    chunk_overlap=50)

splits = text_splitter.split_documents(documents)

### Index

In [5]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings

embedding = GPT4AllEmbeddings(model="/mnt/c/Users/Marek/AppData/Local/nomic.ai/GPT4All/nomic-embed-text-v1.5.f16.gguf", device=device, n_threads=32)

vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)

retriever = vectorstore.as_retriever()

### Create chain

In [6]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

In [7]:
from langchain_experimental.llms.ollama_functions import OllamaFunctions

llm = OllamaFunctions(
    model = "llama3.1",
    temperature = 0.0,
    num_predict = 256,
)

  warn_deprecated(


In [8]:
from typing import List, Tuple
from operator import itemgetter
from langchain_core.pydantic_v1 import BaseModel, Field


template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions in json object as a list. Original question: {question}"""

class AlternativeQuestions(BaseModel):
    questions: List[str] = Field(description="Alternative questions")


prompt = ChatPromptTemplate.from_template(template)

structured_llm = llm.with_structured_output(AlternativeQuestions)
alternative_questions_chain = (
    {"question": RunnablePassthrough()} 
    | prompt
    | structured_llm
    | (lambda x: x.questions)
)

# alternative_questions_chain.invoke("What is the evolution of Lightning ring and how do i get it?")

In [19]:
from langchain.load import dumps, loads
#  Function from https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb
def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results

retreival_chain = (
    {
        "original_question" : RunnablePassthrough(),
        "alternative_questions": alternative_questions_chain,
    }
    | RunnableLambda(lambda x: [x["original_question"]] + x["alternative_questions"])
    | RunnableLambda(lambda x: list(map(retriever.invoke, x)))  # this crashes on gpt4all: retriever.map()
    | reciprocal_rank_fusion
)

In [18]:
template = """Answer the question based on the context below. If the
question cannot be answered using the information provided answer
with "I don't know".

Context: {context}

Question: {question}

Answer: """

prompt = ChatPromptTemplate.from_template(template)

rag_chain = (
    {
        "context": retreival_chain,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()
)
rag_chain.invoke("What is the evolution of Lightning ring and how do i get it?")

'The evolution of Lightning Ring is Thunder Loop. You can get it by evolving Lightning Ring with Duplicator.'