In [None]:
# RAGInitializer Driver
from RAGInitializer import LLMType, EmbeddingType, RAGConfig, initialize_rag_components

'''
Available choices for language models (LLMs)
GPT: 
    0 - gpt-4o
Ollama:
    0 - llama3.1:8b-instruct-q5_K_M
    1 - llama3.2:latest
    2 - mistral-nemo:12b-instruct-2407-q5_K_M

Available choices for embedding models
GPT:
    0 - text-embedding-3-small
    1 - text-embedding-3-large
Ollama:
    0 - nomic-embed-text
    1 - mxbai-embed-large
    2 - all-minilm
    3 - snowflake-arctic-embed
Sentence Transformer:
    0 - all-MiniLM-L6-v2
    1 - all-MiniLM-L12-v2
    2 - all-mpnet-base-v2
    3 - all-distilbert-base-v2
    4 - multi-qa-mpnet-base-dot-v1
'''

# Initialize with configuration
config = RAGConfig(
    env_path = r"",
    llm_type=LLMType.OLLAMA, # GPT or OLLAMA
    embedding_type=EmbeddingType.SENTENCE_TRANSFORMER, # OLLAMA, GPT, or SENTENCE_TRANSFORMER
    llm_index=1,
    embedding_index=2
)

model, embeddings, dimensions, selected_llm, selected_embedding_model, model_manager = initialize_rag_components(config)
if model and embeddings and dimensions:
    print("\nRAG components successfully initialized.")
    print(f"Model: {selected_llm}")
    print(f"Embeddings: {selected_embedding_model}")

In [None]:
# ChunkingInitializer Driver

from ChunkingInitializer import ChunkingInitializer
from ChunkingMethod import ChunkingMethod
import sys

processor = ChunkingInitializer(
    source_path=r"C:\Users\docsp\Desktop\AI_ML_Folder\Python_Practice_Folder\Natural_Language_Processing\Source_Documents\MCDP_1_Warfighting.pdf",
    chunking_method=ChunkingMethod.SEMANTIC,
    enable_preprocessing=False,
    model_name=selected_llm,
    embedding_model=selected_embedding_model
)

# Process documents
try:
    documents = processor.process()
    
    # Output results
    for doc in documents:
        print(f"Chunk metadata: {doc.metadata}")
        print(f"Chunk content: {doc.page_content[:200]}...")
        
except Exception as e:
    print(f"Processing failed: {e}")
    sys.exit(1)

if documents:
    print("\nDocument processing completed successfully.")

In [None]:
# CombinedProcesser Driver - Combines the functionality of ChunkingInitializer and DatastoreInitializer

from CombinedProcessor import CombinedProcessor, ChunkingMethod
from DatastoreInitializer import StorageType

# Initialize processor with RAG components
processor = CombinedProcessor(
    doc_name="test-index",
    model_manager=model_manager,
    embedding_model=selected_embedding_model,
    embeddings=embeddings,
    dimensions=dimensions,
    chunking_method=ChunkingMethod.SEMANTIC,
    storage_type=StorageType.PINECONE_ADD,
    model_name=selected_llm
)

# Process single or multiple documents
doc_input = [r""] 
datastore = processor.process_and_store(doc_input)

In [None]:
# DatastoreInitializer Driver

import sys
from DatastoreInitializer import DatastoreInitializer, StorageType

datastore_manager = DatastoreInitializer(
    doc_name='test-datastore',
    pinecone_api_key=model_manager.get_pinecone_api_key(),
    dimensions=dimensions,
    embedding_model=selected_embedding_model
)

try:
    # Set up datastore
    datastore = datastore_manager.setup_datastore(
        storage_type=StorageType.PINECONE_NEW,
        documents=documents,
        embeddings=embeddings
    )
except Exception as e:
    print(f"Error: {e}")
    sys.exit(1)

In [None]:
# QuestionInitializer Driver

from QuestionInitializer import QuestionInitializer

# QuestionInitializer Driver
processor = QuestionInitializer(
    datastore=datastore,
    model=model,
    embedding_model=selected_embedding_model
)

# Define questions
template_name = "short"  # default, short, or detailed
questions = [
             "What is the Goldwater-Nichols Reorganization Act?",
             "What are the major functions of NAVSEA?",
             "What is the mission of NAVWAR?"
            ]

try:
    # Process questions and get execution time
    processing_time = processor.process_questions(
        questions=questions,
        use_ground_truth=False,
        template_name=template_name  # Added template parameter
    )
    print(f"Total processing time: {processing_time:.2f} seconds")
    
except Exception as e:
    print(f"Error: {e}")