In [75]:
!pip install --quiet langchain langchain_openai langchain_chroma langchain-community langchainhub

In [9]:
!jupyter kernelspec list

0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Available kernels:
  venv       /Users/shubhamrathod/Library/Jupyter/kernels/venv
  python3    /Users/shubhamrathod/.local/share/jupyter/kernels/python3


In [18]:
import time

### Check if Model is working

In [26]:
from langchain_community.llms import Ollama

start = time.time()

llm = Ollama(
    model="llama3"
)

llm.invoke("Tell me a joke")

# Calculate the end time and time taken
end = time.time()
length = end - start

# Show the results : this can be altered however you like
print("It took", length, "seconds!")


It took 5.512098789215088 seconds!


# RAG with Llama-3

In [28]:
llm = Ollama(
    model = 'llama3',
    temperature=0
)

### Import Libraries

In [34]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

### Scrape the data.

In [40]:
# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)

docs = loader.load()

In [44]:
# docs

### Chunk Data.

In [47]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, 
                                               chunk_overlap=200)

splits = text_splitter.split_documents(docs)

In [53]:
# splits
len(splits)

66

### Create Embeddings.

In [64]:
embedding = OpenAIEmbeddings(openai_api_key = '')

### Store embeddings in vector store.

In [67]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)

### Create a Retriever

In [70]:
# Retrieve
retriever = vectorstore.as_retriever()

### Call LLM

In [77]:
prompt = hub.pull("rlm/rag-prompt")


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [79]:
start = time.time()

rag_chain.invoke("What is Task Decomposition?")

# Calculate the end time and time taken
end = time.time()
length = end - start

# Show the results : this can be altered however you like
print("It took", length, "seconds!")


It took 13.361364126205444 seconds!
