In [1]:
import os
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain_community.vectorstores import Cassandra

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
## may run into issues if running python 3.12 or newer
## if so, run pip install pyasyncore

import cassio

In [3]:
groq_api_key = os.environ['GROQ_API_KEY']
astra_token = os.environ['ASTRA_TOKEN_ID']
astra_id = os.environ['ASTRA_DB_ID']

## Connect to Astra DB

cassio.init(token=astra_token, database_id=astra_id)


In [4]:
from langchain_community.document_loaders import WebBaseLoader
import bs4


In [5]:
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                           class_=("post-title", "post-content", "post-header")
                       )))

text_documents = loader.load()

In [None]:
text_documents

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_documents = text_splitter.split_documents(text_documents)

In [21]:
split_documents[:5]

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistakes and refi

In [None]:
## Convert data to vectors and store in AstraDB

embeddings = OllamaEmbeddings(model="gemma3")
astra_vector_store = Cassandra(
    embedding=embeddings,
    table_name="qa_mini_demo",
    session=None
)