In [None]:
import os
import PyPDF2
import docx2txt
from googletrans import Translator
from google.generativeai import configure, GenerativeModel
from bs4 import BeautifulSoup
import requests

# === Gemini Setup ===
configure(api_key="AIzaSyBqk4WXN7k4UhjaBCzgSmuWn_bEor5aSyw")  # Replace with your Gemini key
model = GenerativeModel("gemini-2.0-flash")

# === System Instruction ===
system_instruction = (
    "You are Ambedkar AI, a legal assistant exclusively trained on the Indian legal system.\n"
    "Your task is to interpret laws, judgments, and legal documents strictly according to Indian law.\n"
    "Never provide advice based on international laws. Always cite IPC, CrPC, or applicable Indian acts.\n"
)

# === User Token Plans ===
USER_TOKENS = {
    "555": 10000,
    "1111": 200000,
}
token_usage = {}

def check_token_limit(user_id, tokens_used):
    allowed = USER_TOKENS.get(user_id)
    if allowed is None:
        return False, "❌ Invalid plan ID."
    used = token_usage.get(user_id, 0)
    if used + tokens_used > allowed:
        return False, f"❌ Token limit exceeded: {used + tokens_used}/{allowed}"
    token_usage[user_id] = used + tokens_used
    return True, f"✅ Token usage: {token_usage[user_id]}/{allowed}"

# === File Reader ===
def extract_text(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    try:
        if ext == ".pdf":
            text = ""
            with open(file_path, 'rb') as f:
                reader = PyPDF2.PdfReader(f)
                for page in reader.pages:
                    page_text = page.extract_text()
                    if page_text:
                        text += page_text + "\n"
            return text
        elif ext == ".docx":
            return docx2txt.process(file_path)
        elif ext == ".txt":
            with open(file_path, "r", encoding="utf-8") as f:
                return f.read()
        else:
            return None
    except Exception as e:
        return f"[ERROR] Failed to extract: {e}"

# === Google Translate ===
def translate_text(text, target_lang='en'):
    try:
        if not text:
            return "⚠️ No document content to translate."
        # Ensure text is a string before translation
        text_to_translate = str(text)
        translator = Translator()
        translated = translator.translate(text_to_translate, dest=target_lang)
        return translated.text
    except Exception as e:
        return f"[ERROR] Translation failed: {e}"

# === Legal News ===
def fetch_legal_news():
    try:
        url = "https://www.barandbench.com/news"
        r = requests.get(url)
        soup = BeautifulSoup(r.content, "html.parser")
        headlines = [h.get_text(strip=True) for h in soup.find_all("h3")[:5]]
        return "\n".join(headlines) if headlines else "No headlines found."
    except Exception as e:
        return f"Error fetching news: {e}"

# === Gemini Chat Function ===
def chat_gemini(prompt, user_id, doc_context=""):
    try:
        context_note = f"\nRefer to the uploaded document:\n{doc_context[:3000]}\n" if doc_context else ""
        full_prompt = f"{system_instruction}{context_note}User Query: {prompt}"
        response = model.generate_content(full_prompt)
        tokens_used = len(full_prompt.split()) + len(response.text.split())
        status, msg = check_token_limit(user_id, tokens_used)
        if not status:
            return msg
        return f"\n🧠 Ambedkar AI:\n{response.text}\n\n🔢 {msg}"
    except Exception as e:
        return f"Gemini Error: {e}"

# === Chat Interface ===
def main():
    print("📚 Welcome to Ambedkar 2.1 Ultra Pro - Indian Legal Assistant")
    user_id = input("🔑 Enter your plan ID (555 or 1111): ").strip()

    if user_id not in USER_TOKENS:
        print("❌ Invalid plan ID.")
        return

    doc_context = ""
    print("\n👨‍⚖️ Start chatting with Ambedkar AI below.")
    print("📁 [upload] to upload document | 🌐 [translate] to translate document | 📰 [news] | 🚪 [exit]\n")

    while True:
        print("────────────────────────────────────────────────────────")
        user_input = input("You: ").strip().lower().replace("'", "").replace('"', '')

        if user_input == "exit":
            print("👋 Thank you for using Ambedkar. Stay legally empowered!")
            break

        elif user_input == "[upload]":
            path = input("📁 Enter full file path: ").strip()
            if not os.path.exists(path):
                print("❌ File not found.")
                continue
            extracted = extract_text(path)
            if not extracted:
                print("❌ Unsupported file or empty.")
            else:
                doc_context = extracted
                print("✅ Document uploaded. Context is now active.")

        elif user_input == "[translate]":
            if not doc_context:
                print("⚠️ No document uploaded yet.")
                continue
            lang = input("🌍 Enter target language code (e.g., en, hi, gu): ").strip()
            translated = translate_text(doc_context, lang)
            print(f"\n📝 Translated Document:\n{translated}")

        elif user_input == "[news]":
            print("\n📰 Legal Headlines:\n")
            print(fetch_legal_news())

        else:
            response = chat_gemini(user_input, user_id, doc_context)
            print(response)

if __name__ == "__main__":
    main()

📚 Welcome to Ambedkar 2.1 Ultra Pro - Indian Legal Assistant
🔑 Enter your plan ID (555 or 1111): 555

👨‍⚖️ Start chatting with Ambedkar AI below.
📁 [upload] to upload document | 🌐 [translate] to translate document | 📰 [news] | 🚪 [exit]

────────────────────────────────────────────────────────
You: upload

🧠 Ambedkar AI:
Okay, I understand. You've said "upload". To provide a helpful response, I need more context. Under Indian law, "upload" can relate to various legal situations. To give you a relevant interpretation, please clarify:

*   **What type of data are you referring to uploading?** (e.g., documents, images, videos, financial information)
*   **Where are you uploading it?** (e.g., a website, a social media platform, a government portal, to another individual)
*   **Why are you uploading it?** (e.g., as evidence, for a job application, to share information)
*   **What is your concern related to the upload?** (e.g., data privacy, copyright infringement, potential liability)

Once 

In [2]:
%pip install PyPDF2 docx2txt googletrans==4.0.0-rc1 beautifulsoup4 requests

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting docx2txt
  Downloading docx2txt-0.9-py3-none-any.whl.metadata (529 bytes)
Collecting googletrans==4.0.0-rc1
  Downloading googletrans-4.0.0rc1.tar.gz (20 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting httpx==0.13.3 (from googletrans==4.0.0-rc1)
  Downloading httpx-0.13.3-py3-none-any.whl.metadata (25 kB)
Collecting hstspreload (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading hstspreload-2025.1.1-py3-none-any.whl.metadata (2.1 kB)
Collecting chardet==3.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading chardet-3.0.4-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting idna==2.* (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading idna-2.10-py2.py3-none-any.whl.metadata (9.1 kB)
Collecting rfc3986<2,>=1.3 (from httpx==0.13.3->googletrans==4.0.0-rc1)
  Downloading rfc3986-1.5.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting httpcore==0.9.* (from httpx==