In [2]:
# Setup environment and imports
import os
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
os.chdir('../')

print(f"Working directory: {os.getcwd()}")


Working directory: /home/macowen/Desktop/projects/mediAi


In [3]:
# Import all required modules
import sys
from pathlib import Path

# Add src to path for imports
sys.path.insert(0, str(Path.cwd() / 'src'))

from src.enums import QuestionTheme, ModelType, ResponseSource
from src.models import MedicalAnswer, ThemeDetectionResponse, VectorSearchResult
from src.prompts import PromptTemplates
from src.logger import LoggerSetup
from src.vector_utils import (
    DocumentLoader, DocumentSplitter, EmbeddingManager, 
    VectorStore, VectorSearch
)
from src.model_utils import ModelManager, ThemeDetector, ResponseGenerator
from src.rag_pipeline import MedicalRAGPipeline

# Setup logging
logger = LoggerSetup.setup_logger(__name__)
logger.info("All imports successful")


  from .autonotebook import tqdm as notebook_tqdm
2026-02-01 10:50:09 - __main__ - INFO - [2354185989.py:21] - All imports successful


In [None]:
# Step 1: Load and process documents using DocumentLoader
print("=" * 80)
print("STEP 1: Load Documents")
print("=" * 80)

extracted_data = DocumentLoader.load_pdf_documents('./data/')
print(f"✓ Loaded {len(extracted_data)} PDF documents\n")


In [None]:
# Step 2: Filter and split documents
print("=" * 80)
print("STEP 2: Filter and Split Documents")
print("=" * 80)

filtered_docs = DocumentLoader.filter_documents(extracted_data)
print(f"✓ Filtered to {len(filtered_docs)} documents")

splitted_docs = DocumentSplitter.split_documents(filtered_docs)
print(f"✓ Split into {len(splitted_docs)} chunks\n")


In [None]:
# Step 3: Initialize embeddings and vector store
print("=" * 80)
print("STEP 3: Initialize Embeddings & Vector Store")
print("=" * 80)

embeddings = EmbeddingManager.get_embeddings(ModelType.EMBEDDING.value)
print(f"✓ Embeddings initialized: {ModelType.EMBEDDING.value}")

# Initialize Pinecone and create/load vector store
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
if not PINECONE_API_KEY:
    raise ValueError("PINECONE_API_KEY not found in environment variables")

pc = VectorStore.initialize_pinecone(PINECONE_API_KEY)
index_name = "mediai-bot"

VectorStore.create_index_if_not_exists(pc, index_name)
vectorstore = VectorStore.load_or_create_vectorstore(splitted_docs, embeddings, index_name)
print(f"✓ Vector store ready: {index_name}\n")


In [None]:
# Step 4: Initialize RAG Pipeline
print("=" * 80)
print("STEP 4: Initialize RAG Pipeline")
print("=" * 80)

rag_pipeline = MedicalRAGPipeline(vectorstore)
print("✓ RAG Pipeline initialized and ready\n")


In [None]:
# Step 5: Test the RAG Pipeline with a sample question
print("=" * 80)
print("STEP 5: Test RAG Pipeline")
print("=" * 80)

# Define your question here
user_question = "What is hypertension?"

print(f"Question: {user_question}\n")

# Process through RAG pipeline
answer = rag_pipeline.process_question(user_question, search_k=3)

print(f"Theme Detected: {answer.theme}")
print(f"Confidence Score: {answer.confidence_score:.2f}")
print(f"Source Type: {answer.source_type}")
print(f"\nAnswer:")
print("-" * 80)
print(answer.answer)
print("-" * 80)

if answer.sources:
    print(f"\nSources Used:")
    for i, source in enumerate(answer.sources, 1):
        print(f"  {i}. {source}")

if answer.caveats:
    print(f"\nImportant Notes:")
    print(answer.caveats)
