In [None]:
import os
import numpy as np
import requests
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import faiss

# Set your Groq API key
GROQ_API_KEY = "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"

# === Helper Functions === #
def load_text(file_path):
    """Load text from a file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            return f.read()
    except Exception as e:
        print(f"Error loading text file: {e}")
        return ""

def clean_and_split_text(text):
    """Clean the text and split it into manageable sections."""
    cleaned_text = text.strip()
    sections = cleaned_text.split("\n\n")  # Split text into sections by paragraphs
    return sections

def query_groq_llm(prompt, context):
    """Send a prompt and context to the Groq LLM chat completion API."""
    try:
        url = "https://api.groq.com/v1/chat/completions"
        headers = {
            "Authorization": f"Bearer {GROQ_API_KEY}",
            "Content-Type": "application/json"
        }
        payload = {
            "messages": [
                {"role": "system", "content": "You are an assistant helping summarize and query text."},
                {"role": "user", "content": f"{prompt}\n\nContext:\n{context}"}
            ],
            "model": "llama-3.3-70b-versatile"  # Replace with your desired model
        }
        response = requests.post(url, json=payload, headers=headers)
        if response.status_code == 200:
            return response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response")
        else:
            print(f"Groq API Error: {response.status_code}, {response.text}")
            return "An error occurred while fetching a response from Groq."
    except Exception as e:
        print(f"Error communicating with Groq API: {e}")
        return "An error occurred while fetching a response from Groq."

def build_faiss_index(sections, model):
    """Build a FAISS index from the text sections."""
    embeddings = model.encode(sections)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings, dtype=np.float32))
    return index, sections

def search_faiss(query, model, index, sections, top_k=5):
    """Search the FAISS index for the most relevant sections."""
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding, dtype=np.float32), top_k)
    results = [{"section": sections[i], "distance": distances[0][j]} for j, i in enumerate(indices[0])]
    return results

# === Main Functionality === #
def query_text_file(file_path):
    """Interactive query system for the text file."""
    text = load_text(file_path)
    if not text:
        print("Failed to load text.")
        return

    # Clean and split text
    sections = clean_and_split_text(text)

    # Load SentenceTransformer model
    model = SentenceTransformer('all-MiniLM-L6-v2')

    # Build FAISS index
    index, sections = build_faiss_index(sections, model)

    print("Text query system initialized. Enter a query or type 'exit' to quit.")

    while True:
        user_query = input("\nEnter your query: ").strip()
        if user_query.lower() == "exit":
            print("Exiting the query system. Goodbye!")
            break

        print("\nSearching for relevant sections...")
        results = search_faiss(user_query, model, index, sections, top_k=3)

        if not results:
            print("No relevant sections found.")
            continue

        print("\nTop relevant sections:")
        context = ""
        for i, result in enumerate(results):
            print(f"\nSection {i + 1} (Similarity: {1 / (1 + result['distance']):.4f}):")
            print(result['section'])
            context += f"{result['section']}\n"

        # Use Groq API for enhanced summarization
        print("\nQuerying Groq LLM for additional insights...")
        llm_response = query_groq_llm(user_query, context)
        print("\nGroq LLM Response:")
        print(llm_response)

# === Run the System === #
if __name__ == "__main__":
    file_path = "summary_blockchain.txt"  # Replace with your text file
    query_text_file(file_path)
