In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
urls = [
    "https://www.planetary.org/space-missions/curiosity",
    "https://www.rmg.co.uk/stories/topics/mars-nasa-rover-perseverance-facts-dates"
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)


In [3]:
persist_directory = "../backend/chroma_langchain_db"
collection_name = "rag-chroma"

In [4]:
!rm -rf ../backend/chroma_langchain_db  # remove old database files if any

In [5]:
# Embedding Model
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name=collection_name,
    embedding=oembed,
    persist_directory=persist_directory
)

In [6]:
print(vectorstore._collection.count())

39


In [7]:
docs = vectorstore.similarity_search("Curiosity mission launch", 4)
docs

[Document(id='d3b29ca7-41c4-44c6-9df7-988cd957ac56', metadata={'description': "NASA's Curiosity rover landed on Mars in 2012 to search for evidence that the planet could once have supported Earth-like life.", 'language': 'en', 'source': 'https://www.planetary.org/space-missions/curiosity', 'title': "Curiosity, exploring Mars' surface | The Planetary Society"}, page_content="Watching Curiosity Land on Mars A couple in Times Square listens intently to the news reports as NASA's Curiosity rover attempts to land on Mars.Image: Navid BaratyWhat are Curiosity's mission objectives?Within a year, Curiosity achieved its primary goal of verifying that\nGale Crater, where it landed, was an ancient lakebed that had water and\nchemical elements that once could have supported life.Now in extended mission operations, Curiosity is continuing to look\nfor signs of life in Gale Crater. In January 2022, scientists reported\nthat rock samples found by Curiosity revealed interesting carbon\nsignatures. On 

In [8]:
vector_store_from_client = Chroma(
    persist_directory=persist_directory,
    embedding_function=oembed,
    collection_name=collection_name
)

In [9]:
print(vector_store_from_client._collection.count())

39


In [10]:
docs = vector_store_from_client.similarity_search("Curiosity mission launch", 4)
docs

[Document(id='d3b29ca7-41c4-44c6-9df7-988cd957ac56', metadata={'description': "NASA's Curiosity rover landed on Mars in 2012 to search for evidence that the planet could once have supported Earth-like life.", 'language': 'en', 'source': 'https://www.planetary.org/space-missions/curiosity', 'title': "Curiosity, exploring Mars' surface | The Planetary Society"}, page_content="Watching Curiosity Land on Mars A couple in Times Square listens intently to the news reports as NASA's Curiosity rover attempts to land on Mars.Image: Navid BaratyWhat are Curiosity's mission objectives?Within a year, Curiosity achieved its primary goal of verifying that\nGale Crater, where it landed, was an ancient lakebed that had water and\nchemical elements that once could have supported life.Now in extended mission operations, Curiosity is continuing to look\nfor signs of life in Gale Crater. In January 2022, scientists reported\nthat rock samples found by Curiosity revealed interesting carbon\nsignatures. On 