In [1]:
import os
from dotenv import load_dotenv
from datasets import load_dataset
from langchain.vectorstores.astradb import AstraDB
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

load_dotenv()

ASTRA_DB_COLLECTION = "philosophers"
ASTRA_DB_APPLICATION_TOKEN = os.environ.get("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_API_ENDPOINT = os.environ.get("ASTRA_DB_API_ENDPOINT")
OPEN_AI_API_KEY = os.environ.get("OPENAI_API_KEY")

LANGSMITH_API_KEY = os.environ.get("LANGSMITH_API_KEY")

os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_API_KEY
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "default"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
embedding = OpenAIEmbeddings()
vstore = AstraDB(
    embedding=embedding,
    collection_name=ASTRA_DB_COLLECTION,
    token=ASTRA_DB_APPLICATION_TOKEN,
    api_endpoint=ASTRA_DB_API_ENDPOINT,
)

In [6]:
philo_dataset = load_dataset("datastax/philosopher-quotes")["train"]
print("An example entry:")
print(philo_dataset[16])

print(f"Quote count: {len(philo_dataset)}")

An example entry:
{'author': 'aristotle', 'quote': 'Love well, be loved and do something of value.', 'tags': 'love;ethics'}
Quote count: 450


In [7]:
docs = []

for entry in philo_dataset:
    metadata = {"author": entry["author"]}
    if entry["tags"]:
        # Add metadata tags to the metadata dictionary
        for tag in entry["tags"].split(";"):
            metadata[tag] = "y"
    # Add a LangChain document with the quote and metadata tags
    doc = Document(page_content=entry["quote"], metadata=metadata)
    docs.append(doc)

inserted_ids = vstore.add_documents(docs)
print(f"\nInserted {len(inserted_ids)} documents.")


Inserted 450 documents.


In [None]:
print(vstore.astra_db.collection(ASTRA_DB_COLLECTION).find())

In [3]:
retriever = vstore.as_retriever(search_kwargs={'k': 3})

prompt_template = """
Answer the question based only on the supplied context. If you don't know the answer, say you don't know the answer.
Context: {context}
Question: {question}
Your answer:
"""
prompt = ChatPromptTemplate.from_template(prompt_template)
model = ChatOpenAI(openai_api_key=OPEN_AI_API_KEY)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

response = chain.invoke("In the given context, what subject are philosophers most concerned with?")
print(response)

--- Logging error ---
Traceback (most recent call last):
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langchain_core/callbacks/manager.py", line 1836, in _configure
    handler = LangChainTracer(project_name=tracer_project)
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langchain_core/tracers/langchain.py", line 96, in __init__
    self.client = client or get_client()
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langchain_core/tracers/langchain.py", line 56, in get_client
    _CLIENT = Client()
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langsmith/client.py", line 293, in __init__
    _validate_api_key_if_hosted(self.api_url, self.api_key)
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langsmith/client.py", line 198, in _validate_api_key_if_hosted
    raise ls_utils.LangSm

The subject that philosophers are most concerned with in the given context is knowledge or truth.


--- Logging error ---
Traceback (most recent call last):
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langchain_core/callbacks/manager.py", line 1836, in _configure
    handler = LangChainTracer(project_name=tracer_project)
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langchain_core/tracers/langchain.py", line 96, in __init__
    self.client = client or get_client()
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langchain_core/tracers/langchain.py", line 56, in get_client
    _CLIENT = Client()
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langsmith/client.py", line 293, in __init__
    _validate_api_key_if_hosted(self.api_url, self.api_key)
  File "/Users/hiltonrosenfield/miniconda3/envs/env_ragstack/lib/python3.9/site-packages/langsmith/client.py", line 198, in _validate_api_key_if_hosted
    raise ls_utils.LangSm