In [None]:
%pip install langchain langchain_community langchain_chroma

In [1]:
%pip install -qU langchain-ollama

Note: you may need to restart the kernel to use updated packages.


## Methods to Setup Environmental Variables for API Access

### Method 1: Enter Manually on Run (More Secure)

In [1]:
import getpass
import os

In [2]:
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_API_KEY'] = getpass.getpass('LANGCHAIN_API_KEY: ')

### Method 2: Setup .env file with API keys and Load

In [3]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
loader = WebBaseLoader(
    web_paths=("https://llamaimodel.com/requirements/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=('stats-table')
        )
    ),
)
docs = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

In [6]:
from langchain_community.embeddings import OllamaEmbeddings

In [7]:
embedding = OllamaEmbeddings(
    model='llama3.1'
)

In [8]:
vectorstore = Chroma.from_documents(documents=splits, embedding=embedding)

In [9]:
# vectorstore.delete_collection()
# vectorstore.reset_collection()

In [10]:
from langchain_ollama.llms import OllamaLLM

In [11]:
llm = OllamaLLM(model='llama3.1')

In [12]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [13]:
rag_chain.invoke("What's the article about?")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


'The article is about the specifications and requirements for different models of Llama 3.1, including hardware and software dependencies. The models include 8B, 70B, and 405B, with varying parameters, context length, and multilingual support. It lists detailed hardware and software requirements for each model.'

In [14]:
rag_chain.invoke("List the requirements for the 8b model.")

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


'Here are the requirements for the 8b model:\n\nThe 8b model requires a modern processor with at least 8 cores and 16 GB of RAM, as well as an NVIDIA RTX 3090 (24 GB) or RTX 4090 (24 GB) GPU in 16-bit mode. The estimated GPU memory requirements are approximately 19.2 GB in 16-bit mode, 9.6 GB in 8-bit mode, and 4.8 GB in 4-bit mode.'