In [None]:
from sentence_transformers import SentenceTransformer

# Load the same model used for indexing
# Make sure this matches what generated your .npy files
embedding_model_name = "BAAI/bge-base-en-v1.5"
try:
    query_model = SentenceTransformer(embedding_model_name)
    print(f"Embedding model '{embedding_model_name}' loaded successfully.")
except Exception as e:
    print(f"Error loading embedding model: {e}")
    # Handle error: model might not be downloaded, or path is wrong
    query_model = None


def get_query_embedding(query_text: str):
    if not query_model:
        raise ValueError("Embedding model not loaded.")
    # The model expects a list of texts, even if it's just one
    #q_emb = enc.encode(question, normalize_embeddings=True).tolist()
    q_emb = query_model.encode(query_text, normalize_embeddings=True).tolist()
    return q_emb



In [None]:
from qdrant_client import QdrantClient

QDRANT_URL_LOCAL = "http://localhost:6333"
COLLECTION_NAME_LOCAL = "financial_sp500_local_final_v2" # Your collection name
client = QdrantClient(url=QDRANT_URL_LOCAL)

def search_qdrant(query_vector, top_k=5):
    if not client:
        raise ValueError("Qdrant client not initialized.")
    if not query_vector:
        raise ValueError("Query vector is empty.")

    search_result = client.search(
        collection_name=COLLECTION_NAME_LOCAL,
        query_vector=query_vector,
        limit=top_k,  # Number of results to return
        with_payload=True,  # Crucial: Get the original text and metadata
        with_vectors=False # Usually not needed for RAG context
    )
    return search_result



In [None]:
# (Assuming your previous code for query_model, client, get_query_embedding, search_qdrant is present)

def format_retrieved_context(search_results, max_context_chars=15000):
    """
    Formats the search results from Qdrant into a single string
    to be used as context for the LLM.
    Also returns a list of source metadata for citation.
    """
    context_parts = []
    sources_metadata = []
    current_chars = 0

    if not search_results:
        return "", []

    for i, hit in enumerate(search_results):
        payload = hit.payload
        if payload:
            chunk_text = payload.get("chunk_text", "")
            
            # Estimate if adding this chunk will exceed the character limit
            if current_chars + len(chunk_text) > max_context_chars and context_parts:
                print(f"Context character limit ({max_context_chars}) reached. Stopping context assembly.")
                break # Stop adding more chunks if limit is close

            source_info = f"Source {i+1}:\n"
            source_info += f"  Ticker: {payload.get('ticker', 'N/A')}\n"
            source_info += f"  Year: {payload.get('year', 'N/A')}\n"
            source_info += f"  Filing: {payload.get('filing_type', payload.get('filing_category', 'N/A'))}\n"
            if payload.get('source_type') == 'sec_filing':  #need a check to see if that is what is called
                source_info += f"  Section: {payload.get('section_name', 'N/A')}\n"
                source_info += f"  Item: {payload.get('item', 'N/A')}\n"
            elif payload.get('source_type') == 'earnings_transcript':
                source_info += f"  Quarter: {payload.get('quarter', 'N/A')}\n"
                source_info += f"  Speaker: {payload.get('turn_speaker_simple', 'N/A')}\n"
                source_info += f"  Call Section: {payload.get('turn_section', 'N/A')}\n"
            # source_info += f"  Original File: {payload.get('original_file_name', 'N/A')}\n" # Optional
            # source_info += f"  (Qdrant Score: {hit.score:.4f})\n" # Optional, for debugging relevance

            context_parts.append(source_info)
            context_parts.append(f"  Content: {chunk_text}\n---\n")
            
            sources_metadata.append(payload) # Store the full payload for more detailed citation if needed
            current_chars += len(source_info) + len(chunk_text) + 5 # Rough estimate for newlines etc.
        
    return "".join(context_parts), sources_metadata

In [None]:
from openai import OpenAI

# --- LLM Configuration ---
# IMPORTANT: Set your OpenAI API key as an environment variable:
# export OPENAI_API_KEY="your_api_key_here"
# Or, pass it directly: client_openai = OpenAI(api_key="your_api_key_here")
try:
    llm_client = OpenAI() # Reads API key from environment variable OPENAI_API_KEY
    LLM_MODEL_NAME = "gpt-3.5-turbo" # Or "gpt-4", "gpt-4-turbo-preview", etc.
    print(f"OpenAI client initialized for model: {LLM_MODEL_NAME}")
except Exception as e:
    print(f"Error initializing OpenAI client: {e}")
    print("Please ensure the 'openai' library is installed and your API key is set.")
    llm_client = None


def get_llm_response(query_text: str, context_string: str):
    if not llm_client:
        raise ValueError("LLM client not initialized.")

    system_prompt = "You are a helpful financial analyst assistant. Answer the user's question based ONLY on the provided context. If the information is not in the context, say you don't know or that the context doesn't provide the answer. Be concise and cite the sources if specific information is used."
    
    user_message = f"""
Context from financial documents:
---
{context_string}
---
User Question: {query_text}

Answer:
"""
    try:
        completion = llm_client.chat.completions.create(
            model=LLM_MODEL_NAME,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_message}
            ],
            temperature=0.2 # Lower temperature for more factual, less creative answers
        )
        return completion.choices[0].message.content
    except Exception as e:
        print(f"Error getting response from LLM: {e}")
        return "Error: Could not get a response from the LLM."

In [None]:
def answer_query_with_rag(user_query: str, top_k_retrieval=5):
    print(f"\nProcessing query: '{user_query}'")

    # 1. Embed the query
    print("Embedding user query...")
    try:
        # For BGE models, add the recommended prefix for retrieval queries
        if "bge-" in embedding_model_name.lower(): # embedding_model_name from your setup
            query_for_embedding = f"Represent this sentence for searching relevant passages: {user_query}"
        else:
            query_for_embedding = user_query
        
        query_vector = get_query_embedding(query_for_embedding) # Use your embedding function
        print(f"Query embedded (first 5 dims): {query_vector[:5]}...")
    except Exception as e:
        print(f"Error embedding query: {e}")
        return "Could not process the query due to an embedding error."

    # 2. Search Qdrant for relevant chunks
    print(f"Searching Qdrant for top {top_k_retrieval} relevant chunks...")
    try:
        search_results = search_qdrant(query_vector, top_k=top_k_retrieval) # Use your Qdrant search
        if not search_results:
            print("No relevant documents found in Qdrant.")
            return "I could not find relevant information in the documents to answer your question."
        print(f"Found {len(search_results)} potential chunks.")
    except Exception as e:
        print(f"Error searching Qdrant: {e}")
        return "Could not process the query due to a database search error."

    # 3. Format the retrieved context
    print("Formatting context for LLM...")
    context_string, sources_metadata = format_retrieved_context(search_results)
    if not context_string:
        print("No context could be formatted (perhaps chunks were empty or too large).")
        return "I found some documents, but could not prepare them to answer your question."
    
    # print("\n--- Context being sent to LLM ---")
    # print(context_string[:1000] + "..." if len(context_string) > 1000 else context_string)
    # print("--- End of Context ---")

    # 4. Get response from LLM
    print("Getting response from LLM...")
    try:
        llm_answer = get_llm_response(user_query, context_string)
    except Exception as e:
        print(f"Error in LLM call: {e}")
        return "An error occurred while trying to generate an answer with the LLM."
        
    # 5. Return the answer (and optionally sources)
    # You can enhance this to return structured output
    print("\n--- RAG Process Complete ---")
    return llm_answer, sources_metadata

# --- Example Usage ---
if __name__ == "__main__":
    if not LIBRARIES_AVAILABLE or not query_model or not client or not llm_client:
        print("Exiting: One or more essential components (libraries, models, clients) failed to initialize.")
    else:
        # Test queries
        queries = [
            "What were Agilent's main business segments in 2018?",
            "What did Agilent say about the COVID-19 impact in early 2020?",
            "Summarize the key risks for a major tech company in their 2023 10-K.",
            "What was Apple's revenue in their latest reported quarter for which data exists?" # This requires up-to-date data
        ]

        for q in queries:
            answer, sources = answer_query_with_rag(q, top_k_retrieval=3) # Get 3 chunks for context
            print(f"\n\nQuery: {q}")
            print(f"LLM Answer:\n{answer}")
            
            # print("\nSources Used (Payloads):")
            # for i, src_meta in enumerate(sources):
            #     print(f"  Source {i+1} Ticker: {src_meta.get('ticker')}, File: {src_meta.get('original_file_name')}, Section: {src_meta.get('section_name', src_meta.get('turn_section'))}")
            print("-" * 50)