In [8]:
# !pip install -U langchain langchain-community sentence-transformers faiss-cpu pymupdf python-dotenv


In [18]:
# ✅ STEP 2: Imports
import os
import time
import textwrap
import requests
from dotenv import load_dotenv
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from utils import redact_sensitive_info

In [19]:
# ✅ STEP 3: Load API Key from .env
load_dotenv()
API_KEY = os.getenv("OPENROUTER_API_KEY")
MODEL = "meta-llama/llama-3-8b-instruct"  

In [20]:
# Debug print to verify API key is loaded
print("🔑 API Key loaded:", API_KEY[:8] if API_KEY else "❌ NOT LOADED")

🔑 API Key loaded: sk-or-v1


In [21]:
# ✅ STEP 4: Define OpenRouter API Function
def call_openrouter_api(prompt):
    url = "https://openrouter.ai/api/v1/chat/completions"
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Content-Type": "application/json",
        "HTTP-Referer": "https://chat.openai.com",
        "X-Title": "PDF-QA-App"
    }
    payload = {
        "model": MODEL,
        "messages": [{"role": "user", "content": prompt}]
    }
    try:
        response = requests.post(url, headers=headers, json=payload, timeout=30)
        if response.status_code == 200:
            return response.json()["choices"][0]["message"]["content"]
        else:
            raise Exception(f"API Error {response.status_code}: {response.text}")
    except requests.exceptions.Timeout:
        raise Exception("⏱️ Request timed out. Try again or reduce context size.")
    except Exception as e:
        raise Exception(f"❌ Failed to call OpenRouter API: {str(e)}")

In [13]:
def ask_question_from_pdf(query, retriever, selected_pdf):
    results = retriever.get_relevant_documents(query)
    context = "\n\n".join([doc.page_content for doc in results[:3]])
    context = redact_sensitive_info(context)
    prompt = f"""You are a helpful assistant. Use the following PDF content to answer the question.

📘 Context:
{context}

❓ Question:
{query}

🧠 Answer:"""
    prompt = redact_sensitive_info(prompt)
    response = call_openrouter_api(prompt)

    # Log the conversation
    with open("qa_log.txt", "a") as f:
        f.write(f"\nPDF: {selected_pdf}\nQ: {query}\nA:\n{response}\n{'-'*50}\n")

    # Nicely print the response
    print("\n🔍 Answer:\n")
    for line in textwrap.wrap(response.strip(), width=80):
        print(line)
        time.sleep(0.3)


In [22]:
# ✅ STEP 5: Load and Process PDFs
pdf_dir = "pdfs"
filenames = [os.path.join(pdf_dir, f) for f in os.listdir(pdf_dir) if f.endswith(".pdf")]
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
retrievers = {}

for fname in filenames:
    loader = PyMuPDFLoader(fname)
    docs = splitter.split_documents(loader.load())
    db = FAISS.from_documents(docs, embeddings)
    retrievers[fname] = db.as_retriever()

# ✅ STEP 6: Q&A Function
def ask_question_from_pdf(query, retriever, selected_pdf):
    results = retriever.get_relevant_documents(query)
    context = "\n\n".join([doc.page_content for doc in results[:2]])  # use top 2 chunks for speed
    context = redact_sensitive_info(context)
    prompt = f"""You are a helpful assistant. Use the following PDF content to answer the question.

📘 Context:
{context}

❓ Question:
{query}

🧠 Answer:"""
    prompt = redact_sensitive_info(prompt)

    try:
        print("⏳ Sending to OpenRouter, please wait...")
        start = time.time()
        response = call_openrouter_api(prompt)
        print(f"✅ Response received in {round(time.time() - start, 2)} seconds")
    except Exception as e:
        print(e)
        return

    # Log
    with open("qa_log.txt", "a") as f:
        f.write(f"\nPDF: {selected_pdf}\nQ: {query}\nA:\n{response}\n{'-'*50}\n")

    # Print nicely
    print("\n🔍 Answer:\n")
    for line in textwrap.wrap(response.strip(), width=80):
        print(line)
        time.sleep(0.3)

In [24]:
# ✅ STEP 7: Interactive Multi-PDF Q&A Chat Loop
while True:
    print("\n📚 Available PDFs:")
    for i, fname in enumerate(filenames):
        print(f"{i + 1}. {os.path.basename(fname)}")

    choice = input("\n🔢 Enter PDF number to use (or type 'exit'): ")
    if choice.lower() == 'exit':
        print("👋 Exiting.")
        break

    try:
        selected_pdf = filenames[int(choice) - 1]
        retriever = retrievers[selected_pdf]
        print(f"✅ Selected: {os.path.basename(selected_pdf)}")
    except:
        print("❌ Invalid choice. Try again.")
        continue

    while True:
        question = input("\n❓ Ask a question (or type 'back' to choose another PDF): ")
        if question.lower() == 'back':
            break
        ask_question_from_pdf(question, retriever, selected_pdf)



📚 Available PDFs:
1. A hybrid deep learning approach to integrate predictive maintenance.pdf
2. A_Cloud-Based_Optimized_Ensemble_Model_for_Risk_Prediction_of_Diabetic_ProgressionAn_Azure_Machine_Learning_Perspective.pdf
3. Machine_Learning-Based_Predictive_Maintenance_System_for_Artificial_Yarn_Machines.pdf
4. On_the_Performance_of_Machine_Learning_Models_for_Anomaly-Based_Intelligent_Intrusion_Detection_Systems_for_the_Internet_of_Things.pdf
✅ Selected: A hybrid deep learning approach to integrate predictive maintenance.pdf
⏳ Sending to OpenRouter, please wait...
✅ Response received in 3.1 seconds

🔍 Answer:

Based on the provided PDF content, the following models are used:  1. The
suggested method (integrated model) combines data-driven and model-based
approaches. 2. The model-based approach used in the model-based method is based
on Weibull distribution. 3. The CNN-LSTM-attention model is used to classify and
assign RUL possibility for classifying and assigning the system's state. 