In [4]:
# %pip install langchain
# # %pip install langchain-openai
# # %pip install langchainhub
# # %pip install pypdf
# %pip install chromadb
# %pip install pandas

# DISCLAIMER: 

```
FOR THIS NOTEBOOK YOU WILL NEED TO BE RUNNING LOCALLY OLLAMA + HAVE THE LLAMA 3.1 MODEL INSTALLED
```

See [here](https://python.langchain.com/v0.2/docs/integrations/chat/ollama/) for how to set that up.

In [1]:
import os
import getpass

# Set OPENAI API Key

import os
import getpass

def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"var: ")

_set_env("OPENAI_API_KEY")

A vector database is a way to store these embeddings, these numerical representations that we just discussed.

The pipeline is:
- In coming document
- Create chunks of text from that document
- Embed each chunk
- Store these embeddings

![](2023-07-30-19-32-13.png)

[LangChain for LLM Application Development by Deeplearning.ai](https://learn.deeplearning.ai/langchain/lesson/1/introduction)

In [1]:
import pandas as pd

file_path="./assets-resources/superheroes.csv"
df = pd.read_csv(file_path)

df.head()

Unnamed: 0,Superhero Name,Superpower,Power Level,Catchphrase
0,Captain Thunder,Bolt Manipulation,90,Feel the power of the storm!
1,Silver Falcon,Flight and Agility,85,"Soar high, fearlessly!"
2,Mystic Shadow,Invisibility and Illusions,78,Disappear into the darkness!
3,Blaze Runner,Pyrokinesis,88,Burn bright and fierce!
4,Electra-Wave,Electric Manipulation,82,Unleash the electric waves!


In [2]:
from langchain.document_loaders.csv_loader import CSVLoader

In [3]:
loader = CSVLoader(file_path)
data = loader.load()
data[:5]

[Document(metadata={'source': './assets-resources/superheroes.csv', 'row': 0}, page_content='Superhero Name: Captain Thunder\nSuperpower: Bolt Manipulation\nPower Level: 90\nCatchphrase: Feel the power of the storm!'),
 Document(metadata={'source': './assets-resources/superheroes.csv', 'row': 1}, page_content='Superhero Name: Silver Falcon\nSuperpower: Flight and Agility\nPower Level: 85\nCatchphrase: Soar high, fearlessly!'),
 Document(metadata={'source': './assets-resources/superheroes.csv', 'row': 2}, page_content='Superhero Name: Mystic Shadow\nSuperpower: Invisibility and Illusions\nPower Level: 78\nCatchphrase: Disappear into the darkness!'),
 Document(metadata={'source': './assets-resources/superheroes.csv', 'row': 3}, page_content='Superhero Name: Blaze Runner\nSuperpower: Pyrokinesis\nPower Level: 88\nCatchphrase: Burn bright and fierce!'),
 Document(metadata={'source': './assets-resources/superheroes.csv', 'row': 4}, page_content='Superhero Name: Electra-Wave\nSuperpower: Ele

In [4]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.2",
    verbose=True,
)

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
all_splits = text_splitter.split_documents(data)

In [6]:
# Embed and store
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

vectordb = Chroma.from_documents(documents=all_splits, embedding=OllamaEmbeddings(model='nomic-embed-text'))

In [7]:
question = "What is the name of the thunder super hero?"
docs = vectordb.similarity_search(question)
len(docs)

4

In [8]:
docs

[Document(metadata={'row': 0, 'source': './assets-resources/superheroes.csv'}, page_content='Superhero Name: Captain Thunder\nSuperpower: Bolt Manipulation\nPower Level: 90\nCatchphrase: Feel the power of the storm!'),
 Document(metadata={'row': 30, 'source': './assets-resources/superheroes.csv'}, page_content='Superhero Name: Thunderstrike\nSuperpower: Lightning Control\nPower Level: 91\nCatchphrase: Electrify the battlefield!'),
 Document(metadata={'row': 20, 'source': './assets-resources/superheroes.csv'}, page_content="Superhero Name: Stormbringer\nSuperpower: Weather Manipulation\nPower Level: 93\nCatchphrase: Unleash the storm's fury!"),
 Document(metadata={'row': 8, 'source': './assets-resources/superheroes.csv'}, page_content='Superhero Name: Steel Titan\nSuperpower: Super Strength and Durability\nPower Level: 95\nCatchphrase: Indestructible force of nature!')]

In [9]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages([
    ('system', system_prompt),
    ('human', '{input}')
])


rag_chain_from_docs = (
    {
        'input': lambda x: x['input'],
        'context': lambda x: format_docs(x['context']), 
    }
    | prompt
    | llm
    | StrOutputParser()
)

retriever = vectordb.as_retriever()

# passing the input query to the retriever
retrieve_docs = (lambda x: x['input']) | retriever

qa_chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)

In [10]:
question = "What is the catch phrase for the super hero with the power of producing balls of fire?"
result = qa_chain.invoke({'input': question})
result

{'input': 'What is the catch phrase for the super hero with the power of producing balls of fire?',
 'context': [Document(metadata={'row': 24, 'source': './assets-resources/superheroes.csv'}, page_content='Superhero Name: Blazing Comet\nSuperpower: Fireball Projection\nPower Level: 82\nCatchphrase: Burn brighter than a comet!'),
  Document(metadata={'row': 42, 'source': './assets-resources/superheroes.csv'}, page_content='Superhero Name: Solar Flare\nSuperpower: Solar Energy Projection\nPower Level: 85\nCatchphrase: Feel the burning light!'),
  Document(metadata={'row': 3, 'source': './assets-resources/superheroes.csv'}, page_content='Superhero Name: Blaze Runner\nSuperpower: Pyrokinesis\nPower Level: 88\nCatchphrase: Burn bright and fierce!'),
  Document(metadata={'row': 11, 'source': './assets-resources/superheroes.csv'}, page_content='Superhero Name: Starburst\nSuperpower: Energy Projection\nPower Level: 83\nCatchphrase: Ignite the cosmos!')],
 'answer': 'The superhero with the powe

In [11]:
df.iloc[24]

Superhero Name                  Blazing Comet
Superpower                Fireball Projection
Power Level                                82
Catchphrase       Burn brighter than a comet!
Name: 24, dtype: object