In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount=False)

!pip install -q bitsandbytes transformers accelerate peft

Mounted at /content/drive
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m59.1/59.1 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel


BASE_MODEL_ID = "tarun7r/Finance-Llama-8B"
LORA_PATH = "/content/drive/MyDrive/finance_lora_checkpoints_v3"  # change if needed

# 4-bit quantization (QLoRA-style) to save memory
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("Loading base model (general finance)...")
base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
base_model.eval()

print("Loading LoRA adapter (risk head)...")
risk_model = PeftModel.from_pretrained(base_model, LORA_PATH)
risk_model.eval()

print("Models ready.")


Loading tokenizer...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

Loading base model (general finance)...


config.json:   0%|          | 0.00/923 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 4 files:   0%|          | 0/4 [00:00<?, ?it/s]

pytorch_model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

pytorch_model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

pytorch_model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

pytorch_model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

Loading LoRA adapter (risk head)...
Models ready.


In [3]:
INSTRUCTION = (
    "You are a financial risk analysis model.\n"
    "Read the company text and generate a concise, structured risk report "
    "using EXACTLY the following fields and format, one field per line:\n\n"
    "risk_severity: \n"
    "risk_categories: \n"
    "financial_impact: \n"
    "key_metrics: \n"
    "critical_dates: \n"
    "analysis: <2‚Äì4 sentence narrative>\n\n"
    "Rules:\n"
    "- ALWAYS include every field.\n"
    "- Output MUST be plain text only (no JSON, no braces).\n"
    "- No quotes, no bullets, no extra commentary.\n"
    "- Write clearly and professionally."
)

def build_risk_prompt(text: str) -> str:
    return (
        INSTRUCTION
        + "\n\nCompany text:\n"
        + text
        + "\n\nRisk report:\n"
    )


In [4]:
def clean_financial_impact_block(report: str) -> str:
    """
    Quick win: do NOT try to normalize to pretty numbers.
    Just keep whatever model said; if it hallucinated JSON, strip braces.
    """
    lines = report.splitlines()
    new_lines = []
    for line in lines:
        lower = line.lower().strip()
        if lower.startswith("financial_impact:"):
            # Remove any {...} blobs just to avoid ugly JSON in UI
            raw = line.split(":", 1)[1].strip()
            # Strip leading/trailing braces if present
            raw = raw.strip()
            if raw.startswith("{") and raw.endswith("}"):
                # Replace with a generic sentence
                cleaned = (
                    "Financial impact mentioned qualitatively; "
                    "exact amount and timing may not be precise."
                )
            elif not raw:
                cleaned = (
                    "Financial impact discussed qualitatively; no clear quantified "
                    "loss or gain disclosed."
                )
            else:
                cleaned = raw
            new_lines.append("financial_impact: " + cleaned)
        else:
            new_lines.append(line)
    return "\n".join(new_lines)


def generate_risk_report(company_text: str,
                         max_new_tokens: int = 600,
                         temperature: float = 0.3,
                         top_p: float = 0.95) -> str:
    """
    Uses the LoRA risk head to generate the 6-line risk report.
    """
    prompt = build_risk_prompt(company_text)
    inputs = tokenizer(prompt, return_tensors="pt").to(risk_model.device)

    with torch.no_grad():
        gen = risk_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=1.05,
            do_sample=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    raw = tokenizer.decode(gen[0], skip_special_tokens=True)

    if "Risk report:" in raw:
        out = raw.split("Risk report:")[-1].strip()
    else:
        out = raw.strip()

    # drop empty lines
    out = "\n".join([l.rstrip() for l in out.splitlines() if l.strip()])
    # generic cleaning of financial_impact
    out = clean_financial_impact_block(out)
    return out


In [9]:
def generate_base_answer(question: str,
                         context: str = None,
                         max_new_tokens: int = 340,  # was 280
                         temperature: float = 0.3,
                         top_p: float = 0.9) -> str:
    """
    General finance answer generator:
    ONE rich, complete paragraph (about 5‚Äì7 sentences).
    """

    if context:
        prompt = (
            "You are a highly knowledgeable finance assistant.\n"
            "Use the CONTEXT to answer the QUESTION accurately, but do not copy text verbatim.\n"
            "Write ONLY ONE rich, complete paragraph of about 5 to 7 sentences for a retail investor.\n"
            "Do NOT add headings, section titles, disclaimers, or notes like 'Note:'.\n"
            "Do NOT use bullet points or numbered lists.\n"
            "Finish your last sentence cleanly with a period and then STOP.\n\n"
            "=== CONTEXT ===\n"
            f"{context}\n\n"
            "=== QUESTION ===\n"
            f"{question}\n\n"
            "=== FINAL ANSWER ===\n"
        )
    else:
        prompt = (
            "You are a highly knowledgeable finance assistant.\n"
            "Answer the question in ONLY ONE rich, complete paragraph of about 5 to 7 sentences.\n"
            "Do NOT add headings, section titles, disclaimers, or notes like 'Note:'.\n"
            "Do NOT use bullet points or numbered lists.\n"
            "Finish your last sentence cleanly with a period and then STOP.\n\n"
            "Question:\n"
            f"{question}\n\n"
            "Final answer:\n"
        )

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        max_length=2048
    ).to(base_model.device)

    with torch.no_grad():
        gen_ids = base_model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,  # 340 tokens ‚âà 5‚Äì7 full sentences
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=1.05,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

    full = tokenizer.decode(gen_ids[0], skip_special_tokens=True)

    if "=== FINAL ANSWER ===" in full:
        answer = full.split("=== FINAL ANSWER ===", 1)[-1].strip()
    elif "Final answer:" in full:
        answer = full.split("Final answer:", 1)[-1].strip()
    else:
        if full.startswith(prompt):
            answer = full[len(prompt):].strip()
        else:
            answer = full.strip()

    cut_markers = [
    "Read the company text and generate a concise, structured",
    "using EXACTLY the following fields",
    "risk_severity:",
    "financial_impact:",
    "Risk report:",
    "\nNote:",
    "\nNOTE:",
    "Risk report generated by",
    "This report was generated using",
    "Read the full report at",
    "visit [RiskAI]",
]
    for m in cut_markers:
        idx = answer.find(m)
        if idx != -1:
            answer = answer[:idx].rstrip()
            break

    if answer and answer[-1] not in [".", "!", "?"]:
        answer = answer.rstrip() + "."

    return answer


In [None]:
def is_risk_intent(question: str) -> bool:
    """
    Cheap heuristic: look for risk-related words.
    """
    if not question:
        return True
    q = question.lower()
    triggers = [
        "risk", "market risk", "liquidity", "debt", "leverage",
        "covenant", "fx", "foreign currency", "interest rate",
        "hedge", "hedging"
    ]
    return any(word in q for word in triggers)


def build_risk_commentary_prompt(company_text: str, risk_report: str) -> str:
    return (
        "You are a senior financial risk officer.\n"
        "You are given:\n"
        "1) Company text from a filing or disclosure.\n"
        "2) A structured risk report produced by an internal model.\n\n"
        "Write a short, professional explanation (one paragraph, 4‚Äì6 sentences) that:\n"
        "- Explains the main market, interest-rate, debt and liquidity risks.\n"
        "- Uses figures from the original company text when available.\n"
        "- If any number in the report conflicts with the text, rely on the text.\n"
        "- Do NOT repeat the six field names.\n"
        "- Do NOT rewrite the risk report as a bullet list.\n"
        "- Focus on interpretation and implications for the firm's risk profile.\n\n"
        "=== COMPANY TEXT ===\n"
        f"{company_text}\n\n"
        "=== RISK REPORT ===\n"
        f"{risk_report}\n\n"
        "=== EXPLANATION ===\n"
    )


def finance_router(user_question: str, company_text: str = ""):
    """
    - If company_text given and intent is risk-related -> 6-line risk report + commentary.
    - Else -> general finance answer from base model.
    """
    company_text = company_text.strip()

    # Risk path
    if company_text and is_risk_intent(user_question):
        risk_report = generate_risk_report(company_text)
        commentary_prompt = build_risk_commentary_prompt(company_text, risk_report)
        commentary = generate_base_answer(
            question="Explain the firm's risk profile based on the above.",
            context=commentary_prompt,
            max_new_tokens=340,   # match base default
        )
        combined = risk_report.strip() + "\n\n" + commentary.strip()
        return {"mode": "risk", "answer": combined}

    # General finance path
    answer = generate_base_answer(
        question=user_question or "Explain the following financial text.",
        context=company_text or None,
        max_new_tokens=340,      # match base default
    )
    return {"mode": "general", "answer": answer}


In [11]:
!pip install flask flask-cors pyngrok nest_asyncio
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import nest_asyncio, threading

app = Flask(__name__)
CORS(app, resources={r"/*": {"origins": "*"}})

@app.route("/ping", methods=["GET"])
def ping():
    return {"status": "ok"}, 200

@app.route("/echo", methods=["POST"])
def echo():
    data = request.get_json(force=True)
    return {"received": data}, 200

@app.route("/api/analyze", methods=["POST"])
def analyze():
    print("\nüî• /api/analyze CALLED")
    data = request.get_json(force=True)
    print("‚ÑπÔ∏è Received:", data)

    question = data.get("question", "")
    company_text = data.get("company_text", "")

    result = finance_router(user_question=question, company_text=company_text)

    print("‚úÖ Returning response")
    return jsonify(result)


# !pip install flask flask-cors pyngrok nest_asyncio
# from flask import Flask, request, jsonify
# from flask_cors import CORS
# from pyngrok import ngrok
# import nest_asyncio, threading

# app = Flask(__name__)
# CORS(app, resources={r"/*": {"origins": "*"}})

# @app.route("/api/analyze", methods=["POST"])
# def analyze():
#     print("\nüî• /api/analyze CALLED")
#     data = request.get_json(force=True)
#     print("‚ÑπÔ∏è Received:", data)
#     question = data.get("question","")
#     company_text = data.get("company_text","")
#     result = finance_router(user_question=question, company_text=company_text)
#     return jsonify(result)




In [12]:
NGROK_AUTH_TOKEN = "36dn3NVTGZvRgMKFNJOvKzGdear_4EdJYindPnxhYG5uGDe6Z"

nest_asyncio.apply()

def run_flask():
    app.run(host="0.0.0.0", port=8000, debug=False, use_reloader=False)

thread = threading.Thread(target=run_flask)
thread.start()

ngrok.set_auth_token(NGROK_AUTH_TOKEN)
public_url = ngrok.connect(8000)

print("‚≠ê PUBLIC URL:", public_url)
print("‚≠ê API ENDPOINT:", str(public_url) + "/api/analyze")


# NGROK_AUTH_TOKEN = "36dn3NVTGZvRgMKFNJOvKzGdear_4EdJYindPnxhYG5uGDe6Z"

# nest_asyncio.apply()

# def run_flask():
#     app.run(host="0.0.0.0", port=8000, debug=False, use_reloader=False)

# threading.Thread(target=run_flask).start()

# ngrok.set_auth_token(NGROK_AUTH_TOKEN)
# public_url = ngrok.connect(8000)
# print("‚≠ê API ENDPOINT:", str(public_url) + "/api/analyze")


 * Serving Flask app '__main__'
 * Debug mode: off


Address already in use
Port 8000 is in use by another program. Either identify and stop that program, or start the server with a different port.


‚≠ê PUBLIC URL: NgrokTunnel: "https://unthawing-profanely-ula.ngrok-free.dev" -> "http://localhost:8000"
‚≠ê API ENDPOINT: NgrokTunnel: "https://unthawing-profanely-ula.ngrok-free.dev" -> "http://localhost:8000"/api/analyze
