In [16]:
from langchain_huggingface import HuggingFaceEndpoint
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate,ChatPromptTemplate
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
import os
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain.chains.llm import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain_mistralai.chat_models import ChatMistralAI

In [17]:

repo_id = "mistralai/Mistral-7B-Instruct-v0.3"
model=HuggingFaceEndpoint(repo_id=repo_id,huggingfacehub_api_token=api)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to C:\Users\hp\.cache\huggingface\token
Login successful


In [18]:
loader=WebBaseLoader("https://towardsdatascience.com/the-ins-and-outs-of-retrieval-augmented-generation-rag-56f470ccda4")
docs=loader.load()
docs

[Document(page_content='The Ins and Outs of Retrieval-Augmented Generation (RAG) | by TDS Editors | Towards Data ScienceOpen in appSign upSign inWriteSign upSign inThe Ins and Outs of Retrieval-Augmented Generation (RAG)TDS Editors·FollowPublished inTowards Data Science·Sent as aNewsletter·3 min read·Oct 12, 2023--ListenShareWhen accessible large language models first came on the scene, the excitement was impossible to miss: beyond their sheer novelty, they came with the promise to completely transform numerous fields and lines of work.Almost a year after the launch of ChatGPT, we’re far more aware of LLMs’ limitations, and of the challenges we face when we try to integrate them into real-world products. We’ve also, by now, come up with powerful strategies to complement and enhance LLMs’ potential; among these, retrieval-augmented generation (RAG) has emerged as—arguably—the most prominent. It gives practitioners the power to connect pre-trained models to external, up-to-date informati

# Summarize

In [20]:
map_tempelate="""This the following set of documents
{docs}
Based on this list of docs . please identify the main themes
Helpful Answer:"""
map_prompt=PromptTemplate.from_template(map_tempelate)
map_chain=LLMChain(llm=model,prompt=map_prompt)

reduce_template = """The following is set of summaries:
{docs}
Take these and distill it into a final, consolidated summary of the main themes. 
Helpful Answer:"""
reduce_prompt=PromptTemplate.from_template(reduce_template)
reduce_chain=LLMChain(llm=model,prompt=reduce_prompt)
reduce_chain = LLMChain(llm=model, prompt=reduce_prompt)

combine_document_chain=StuffDocumentsChain(
    llm_chain=reduce_chain,document_variable_name='docs'
)
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_document_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_document_chain,
    # The maximum number of tokens to group documents into.
    token_max=1500,
)
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=False,
)
text_splitter=RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap=20)
split_docs=text_splitter.split_documents(docs)
map_reduce_chain.run(split_docs)



'\n\nThe main themes of the provided set of documents revolve around:\n\n1. Machine Learning Community: The importance of collaboration, sharing insights, and publishing work within the machine learning community.\n2. Writing for a Global Audience: Best practices for writing about machine learning topics in a way that is accessible to a global audience.\n3. Projects and Applications: Sharing examples of machine learning projects and their applications, such as text-to-speech technology.\n4. Privacy and Terms of Service: The role of privacy and terms of service in the machine learning field, particularly in relation to data usage.\n5. Reinforcement Learning and Dynamic Pricing: The application of reinforcement learning, specifically the Multi-Armed Bandits algorithm, in the context of dynamic pricing.\n6. Collaboration and Teamwork: Strategies for effective collaboration within machine learning teams or the broader field.\n7. Continuous Learning: Emphasizing the importance of staying up

# Chat Web

In [21]:
repo_id = 'sentence-transformers/all-MiniLM-L6-v2'
embeddings=HuggingFaceInferenceAPIEmbeddings(repo_id=repo_id , api_key=api,add_to_git_credential=True)

In [23]:
from langchain_community.vectorstores import Chroma
import chromadb
chroma_client=chromadb.Client()
chroma_client.delete_collection('new_collection')
collection=chroma_client.create_collection(name="new_collection")
vectorestore=Chroma.from_documents(documents=split_docs,
                                   collection_name='new_collection',
                                   embedding=embeddings)
reteriver=vectorestore.as_retriever()


In [24]:
template="""You are a helpful assistant that generates multiple search queries
based on the sing queries realated to : {question} \n
Output (4 queries)"""
prompt_rag_fusion=ChatPromptTemplate.from_template(template)

In [25]:
generate_queries=(
    prompt_rag_fusion
    | model
    | StrOutputParser()
    | (lambda x: x.split('\n'))
)

In [26]:
question="What is RAG?"

In [27]:
from langchain.load import dumps, loads
def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results
retrival_chain_rag_fusion= generate_queries | reteriver.map() | reciprocal_rank_fusion

docs=retrival_chain_rag_fusion.invoke({'question':question})


  warn_beta(


In [28]:
from langchain_core.runnables import  RunnablePassthrough

template = """Answer the following question based on the context:
{context}
Question:{question}
if the question is  not related to context just simply say "I am not trained on this topic" 
"""
prompt = ChatPromptTemplate.from_template(template)

# Create the final chain
final_rag_chain = (
    {"context": retrival_chain_rag_fusion, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

# Now, use the invoke method correctly
result = final_rag_chain.invoke({"question": question})

print(result)

Answer: Retrieval-Augmented Generation (RAG) is a strategy that gives practitioners the power to connect pre-trained models to external, up-to-date information sources that can generate more accurate and more useful outputs. It is a tool that helps in boosting the performance of large language models (LLMs).
