In [1]:
import os

from llama_index.vector_stores import AstraDBVectorStore
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
)
from llama_index.llama_dataset import download_llama_dataset
from dotenv import load_dotenv

In [4]:
load_dotenv()

ASTRA_DB_APPLICATION_TOKEN = os.environ.get("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_API_ENDPOINT = os.environ.get("ASTRA_DB_API_ENDPOINT")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

In [5]:
dataset = download_llama_dataset(
  "PaulGrahamEssayDataset", "./data"
)

documents = SimpleDirectoryReader("./data").load_data()
print(f"Total documents: {len(documents)}")
print(f"First document, id: {documents[0].doc_id}")
print(f"First document, hash: {documents[0].hash}")
print(
    "First document, text"
    f" ({len(documents[0].text)} characters):\n"
    f"{'=' * 20}\n"
    f"{documents[0].text[:360]} ..."
)

Total documents: 1
First document, id: d8832b15-40b5-418c-af1a-73ecf712162e
First document, hash: 5e01619864093cb27f01c4db4d7d17902e9f5802b77a8e3890d36b4527dc32b8
First document, text (250287 characters):
{
    "examples": [
        {
            "query": "In the essay, the author mentions his early experiences with programming. Describe the first computer he used for programming, the language he used, and the challenges he faced.",
            "query_by": {
                "model_name": "gpt-4",
                "type": "ai"
            },
            "refere ...


In [6]:
astra_db_store = AstraDBVectorStore(
    token=ASTRA_DB_APPLICATION_TOKEN,
    api_endpoint=ASTRA_DB_API_ENDPOINT,
    collection_name="test",
    embedding_dimension=1536,
)

In [7]:
storage_context = StorageContext.from_defaults(vector_store=astra_db_store)

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

In [8]:
query_engine = index.as_query_engine()
query_string_1 = "Why did the author choose to work on AI?"
response = query_engine.query(query_string_1)

print(query_string_1)
print(response.response)

Why did the author choose to work on AI?
The author chose to work on AI because of two specific influences. The first influence was a novel called "The Moon is a Harsh Mistress" by Heinlein, which featured an intelligent computer called Mike. The second influence was a PBS documentary that showed Terry Winograd using SHRDLU, a program that could understand and respond to natural language commands. These two influences sparked the author's interest in AI and made him believe that the development of intelligent computers like Mike and programs like SHRDLU was imminent.
