# Langchain basic RAG using Mitsral AI

In [1]:
%pip install pypdf python-dotenv langchain langchain-core langchain-mistralai langchainhub langchain-experimental chromadb

Note: you may need to restart the kernel to use updated packages.


In [2]:
from dotenv import load_dotenv
load_dotenv() # load .env api keys 

True

In [3]:
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_mistralai import MistralAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import FAISS
from langchain import hub

In [4]:
llm = ChatMistralAI(model='mistral-large-latest')

In [5]:
# 2402.17764
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('./2402.17764.pdf')
pages = loader.load_and_split()

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)
vectorstore = Chroma.from_documents(documents=splits, embedding=MistralAIEmbeddings())

retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)
    
    
rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )



In [9]:
rag_chain.invoke("What is the main idea of the paper?")

'The main idea of the paper "Llama 2: open foundation and fine-tuned chat models" by Hugo Touvron et al. is the development and evaluation of open foundation and fine-tuned chat models. The paper likely focuses on the implementation and performance of these models, but without more context, it is difficult to provide a more detailed summary.'