In [1]:
%pip install --quiet --upgrade langchain-text-splitters langchain-community langgraph

Note: you may need to restart the kernel to use updated packages.


In [67]:
import os
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(".env"))

print("API set:", bool(os.getenv("LANGCHAIN_API_KEY")))
print("Project:", os.getenv("LANGCHAIN_PROJECT"))

API set: True
Project: ollama-tests


In [5]:
pip install langchain-ollama langchain-community langchain-core

Note: you may need to restart the kernel to use updated packages.


In [68]:
# Chat Model
from langchain_ollama import OllamaLLM
llm = OllamaLLM(model="llama3.2:1b")

# Embeddings
from langchain_ollama import OllamaEmbeddings

# Use the exact model name from ollama.list()
embeddings = OllamaEmbeddings(model="hf.co/CompendiumLabs/bge-base-en-v1.5-gguf:latest")

In [69]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embedding=embeddings)
print("✅ Vector store created!")

✅ Vector store created!


UseCase

In [70]:
%pip install bs4

11295.59s - pydevd: Sending message related to process being replaced timed-out after 5 seconds


Note: you may need to restart the kernel to use updated packages.


In [71]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

Document Loader

In [72]:
# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [73]:
print(f"Number of documents: {len(docs)}")
print(f"First document content length: {len(docs[0].page_content)}")
print(f"Metadata: {docs[0].metadata}")

Number of documents: 1
First document content length: 43047
Metadata: {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}


In [74]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)

In [75]:
len(all_splits)

63

In [76]:
import ollama
print("Available models:")
print(ollama.list())

Available models:
models=[Model(model='hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF:latest', modified_at=datetime.datetime(2025, 8, 31, 12, 51, 10, 240689, tzinfo=TzInfo(-04:00)), digest='042cf58aa32fa6ce631caff838fc9efd05313a629530756ccf59308eaa49b8aa', size=807696561, details=ModelDetails(parent_model='', format='gguf', family='llama', families=['llama'], parameter_size='1.24B', quantization_level='unknown')), Model(model='hf.co/CompendiumLabs/bge-base-en-v1.5-gguf:latest', modified_at=datetime.datetime(2025, 8, 31, 12, 51, 5, 301862, tzinfo=TzInfo(-04:00)), digest='98c4eb4a3287679e9d01ee6bd664136c194871e741bb405b082ffa65fed19c95', size=68348847, details=ModelDetails(parent_model='', format='gguf', family='bert', families=['bert'], parameter_size='109M', quantization_level='unknown'))]


In [77]:
test_embeddings = embeddings.embed_documents(["Hello world"])
print(f"✅ Embeddings working! Length: {len(test_embeddings[0])}")

✅ Embeddings working! Length: 768


In [78]:
# Index chunks
_ = vector_store.add_documents(documents=all_splits)
print("✅ Documents added successfully!")

✅ Documents added successfully!


In [80]:
prompt = hub.pull("rlm/rag-prompt")



In [82]:
# from langsmith import Client
# client = Client()  # reads env vars loaded above
# print([p.name for p in client.list_projects()][:5])


In [57]:
# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

In [58]:
# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [59]:
# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
response = graph.invoke({"question": "What is Task Decomposition?"})
print(response["answer"])