In [None]:
from dotenv import load_dotenv
import os
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
)
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.vector_stores.astra_db import AstraDBVectorStore

from llama_index.llms.groq import Groq

In [2]:
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "1"

load_dotenv()

astra_api_endpoint = os.getenv("ASTRA_DB_ENDPOINT")
astra_db_token = os.getenv("ASTRA_DB_TOKEN")
groq_api_token = os.getenv("GROQ_API_TOKEN")

# Validate that all required environment variables are present
if not all([astra_api_endpoint, astra_db_token, groq_api_token]):
    raise ValueError(
        "Missing required environment variables. Please check your .env file"
    )

In [None]:
# load documents
documents = SimpleDirectoryReader("./data/paul_graham/").load_data(num_workers=10)
print(f"Total documents: {len(documents)}")
print(f"First document, id: {documents[0].doc_id}")
print(f"First document, hash: {documents[0].hash}")
print(
    "First document, text"
    f" ({len(documents[0].text)} characters):\n{'='*20}\n{documents[0].text[:360]} ..."
)

Total documents: 9
First document, id: a1a8887e-cccc-4575-bf2e-5a37ef5346ea
First document, hash: 881f20b1d98203aa43022626abbacda8eac242f047fcc468659e6470aad6a965
First document, text (23 characters):
LIVER  
HEALTH  
BASICS ...


In [4]:
astra_db_store = AstraDBVectorStore(
    token=astra_db_token,
    api_endpoint=astra_api_endpoint,
    collection_name="medical_collections",
    embedding_dimension=768,
)

In [None]:
embed_model = OllamaEmbedding(model_name="nomic-embed-text", base_url="http://localhost:11434",)

storage_context = StorageContext.from_defaults(vector_store=astra_db_store)

index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model
)

In [None]:
llm = Groq(model="deepseek-r1-distill-llama-70b", api_key=groq_api_token)

query_engine = index.as_query_engine(llm=llm)
response = query_engine.query("A BRIEF REVIEW OF THE ESSAY")

print(response.response)

<think>
Okay, I need to answer the query "A BRIEF REVIEW OF THE FATTY LIVER DISEASE" using only the provided context information. Let me go through the context step by step.

Looking at the context, I see that on page 4, there's a mention of "Hepatitis and/or Fatty Liver" under the different types of liver disease. However, the context doesn't provide detailed information about fatty liver disease itself. It talks about other conditions like hepatitis, fibrosis, cirrhosis, and liver cancer, but not specifically about fatty liver.

I should check if there's any indirect information. On page 6, there's a section about liver cancer drivers and factors, which includes "Diabetes & Obesity" and "NASH" (Non-Alcoholic Steatohepatitis Hepatitis), which is related to fatty liver. But again, the context doesn't explicitly discuss fatty liver disease in detail.

Since the context doesn't provide a specific section or detailed information about fatty liver disease, I can't form a detailed answer. T