In [None]:
import os
from dotenv import load_dotenv

load_dotenv()

In [None]:
import cassio

ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_ID = os.getenv("ASTRA_DB_ID")

cassio.init(
    token = ASTRA_DB_APPLICATION_TOKEN,
    database_id = ASTRA_DB_ID
)

In [None]:
urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma

## Load and Split Documents

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=500, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
from langchain.vectorstores.cassandra import Cassandra

astra_vector_store = Cassandra(
    embedding = embeddings,
    table_name = "qa_mini_demo",
    session = None,
    keyspace = None
)

In [None]:
from langchain.indexes.vectorstore import VectorStoreIndexWrapper

astra_vector_store.add_documents(doc_splits)
print("Inserted %i headlines." % len(doc_splits))
astra_vector_index = VectorStoreIndexWrapper(vectorstore = astra_vector_store)

In [None]:
retriever = astra_vector_store.as_retriever()

retriever.invoke("What is agent",ConsistencyLevel="LOCAL_ONE")