In [7]:
import os
from dotenv import load_dotenv

load_dotenv() 
os.environ["ASTRA_DB_APPLICATION_TOKEN"] = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
os.environ["ASTRA_DB_ID"] = os.getenv("ASTRA_DB_ID")
os.environ["ASTRA_DB_API_ENDPOINT"] = os.getenv("ASTRA_DB_API_ENDPOINT")

In [10]:
from langchain_astradb import AstraDBVectorStore
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="all-minilm")
vector_store = AstraDBVectorStore(
            collection_name= "testdb",
            embedding=embeddings,
            token = os.getenv("ASTRA_DB_APPLICATION_TOKEN"),
            api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT")
)

In [13]:
from datasets import load_dataset

phil_dataset = load_dataset("datastax/philosopher-quotes")["train"]
print("example")
print(phil_dataset[16])

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Generating train split: 100%|██████████| 450/450 [00:00<00:00, 1052.36 examples/s]

example
{'author': 'aristotle', 'quote': 'Love well, be loved and do something of value.', 'tags': 'love;ethics'}





In [16]:
from langchain_classic.schema import Document

# Constructs a set of documents from your data. Documents can be used as inputs to your vector store.
docs = []
for entry in phil_dataset:
    metadata = {"author": entry["author"]}
    if entry["tags"]:
        # Add metadata tags to the metadata dictionary
        for tag in entry["tags"].split(";"):
            metadata[tag] = "y"
    # Create a LangChain document with the quote and metadata tags
    doc = Document(page_content=entry["quote"], metadata=metadata)
    docs.append(doc)

In [17]:
docs[0]

Document(metadata={'author': 'aristotle', 'knowledge': 'y'}, page_content="True happiness comes from gaining insight and growing into your best possible self. Otherwise all you're having is immediate gratification pleasure, which is fleeting and doesn't grow you as a person.")

In [18]:
insterted_ids = vector_store.add_documents(docs)

In [26]:
from langchain_classic.prompts import ChatPromptTemplate
from langchain_ollama import OllamaLLM
from langchain_classic.schema.output_parser import StrOutputParser
from langchain_classic.schema.runnable import RunnablePassthrough

retriever = vector_store.as_retriever(searcj_kwargs={"k":3})
prompt_template = """
Answer the question based only on the supplied context. If you don't know the answer, say you don't know the answer.
Context: {context}
Question: {question}
Your answer:
"""
prompt = ChatPromptTemplate.from_template(prompt_template)
model = OllamaLLM(model="gemma3:1b")

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke("In the given context, what is the most important to allow the brain and provide me the tags?")


'The context states that sleep is necessary to restore the brain and is fundamental to the brain’s nature.\nTags: brain, sleep'

In [None]:
# WARNING: This will delete the collection and all documents in the collection
# vector_store.delete_collection()