In [None]:
# uncomment and run below:
%pip install langchain
%pip install langchain-openai
%pip install -U langsmith

1. Environment setup

2. Prototype in with jupyter notebook + langsmith for inspection

3. Set up the chain in the project app structure

4. Set up server

5. Deploy locally

6. Test it on the playground

7. Call it as an API

# 1. Environment setup

```
conda create -n rag-pdf-app python=3.11
conda activate rag-pdf-app
pip install -U "langchain-cli [serve]" "langserve [all]"
langchain app new .
poetry add langchain
poetry add langchain-community
poetry add langchain-openai
poetry add chromadb
poetry add pypdf
poetry add tiktoken
poetry add openai
poetry add jupyter
poetry add python-dotenv
```

Below is optional depending on your development setup:

```
poetry run jupyter notebook
```

# 2. Prototype in with jupyter notebook + langsmith for inspection

In [1]:
import os
import getpass

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = getpass.getpass()

In [None]:
# in rag-pdf-app/app/chain.py

# inspired by this template from langchain: https://github.com/langchain-ai/langchain/blob/master/templates/rag-chroma-private/rag_chroma_private/chain.py

from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import RetrievalQA
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain import hub
from typing import List, Tuple

def load_pdf(file_path: str="./paper.pdf") -> str:
    loader = PyPDFLoader(file_path)
    return loader.load()

def index_docs(docs: List[str], 
                persist_directory: str="./i-shall-persist", 
                embedding_model: str="llama3"):
    embeddings = OllamaEmbeddings(model=embedding_model)
    vectordb = Chroma.from_documents(docs, embeddings, persist_directory=persist_directory)
    retriever = vectordb.as_retriever()
    
    return retriever


file_path = "./paper.pdf"

docs = load_pdf(file_path)

retriever = index_docs(docs)

template = """
Ansdwer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

llm = ChatOllama(model="llama3")

# 2 suggestions for creating the rag chain:

# chain = (
#     RunnableParallel({"context": retriever, "question": RunnablePassthrough()}) # RunnablePassthrough source: https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.passthrough.RunnablePassthrough.html#langchain-core-runnables-passthrough-runnablepassthrough:~:text=Runnable%20to%20passthrough,and%20experiment%20with.
#     # RunnableParallel source: https://api.python.langchain.com/en/latest/runnables/langchain_core.runnables.base.RunnableParallel.html
#     | prompt
#     | llm
#     | StrOutputParser()
# )

chain = RetrievalQA.from_chain_type(llm, retriever=retriever, chain_type_kwargs={"prompt": prompt}, return_source_documents=True) | RunnableLambda(lambda x: x["result"])
# qa_chain = RetrievalQA.from_chain_type(llm, retriever=retriever, return_source_documents=True)

# Add typing for input
class Question(BaseModel):
    __root__: str
    # The __root__ field in Pydantic models is used to define a model
    # where you expect a single value or a list rather than a dictionary 
    # of named fields. Essentially, it allows your model to handle instances 
    # where data does not naturally fit into a key-value structure, 
    # such as a single value or a list.


rag_chain = chain.with_types(input_type=Question)

# 3. Set up the chain in the project app structure

![](./assets-resources/langchain-project-structure.png)

Go to the folder: `rag-pdf-app/app/`

and save the rag code from above in `chain.py`

# 4. Set up server

Go to `rag-pdf-app/app/server.py`

and change:

```python
from fastapi import FastAPI
from fastapi.responses import RedirectResponse
from langserve import add_routes
from app.chain import rag_chain

app = FastAPI()


@app.get("/")
async def redirect_root_to_docs():
    return RedirectResponse("/docs")


# Edit this to add the chain you want to add
# Add routes connects the chain to our app exposing the methods of the chain to our web server
add_routes(app,rag_chain, path="/rag-local")

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=8000)
```

# 5. Deploy locally

From within the conda environment in the root folder of the project:

`langchain serve`

or with poetry:

`poetry run langchain serve --port=8100`

# 6. Test it on the playground

`rag-local/playground/`

# 7. Call it as an API

```
from langserve.client import RemoteRunnable

runnable = RemoteRunnable("http://localhost:8000/rag-local")

runnable.invoke("What is attention in this paper?")
```