In [None]:
# --- Cell A: wire Search + Azure OpenAI into a tiny RAG helper ---

import os, textwrap
from typing import List, Dict, Optional

from dotenv import load_dotenv, find_dotenv
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery
from azure.core.exceptions import HttpResponseError
from azure.search.documents.models import HybridSearch

from openai import AzureOpenAI, APIConnectionError

load_dotenv(find_dotenv(), override=True)

# ---- Config (expects the same envs you already used) ----
SEARCH_ENDPOINT = os.environ["AZURE_SEARCH_ENDPOINT"]
SEARCH_INDEX    = os.environ["AZURE_SEARCH_INDEX"]
SEARCH_KEY      = os.getenv("AZURE_SEARCH_API_KEY")  # omit if using AAD/RBAC
VECTOR_FIELD    = os.getenv("VECTOR_FIELD", "text_vector_v4")
TEXT_FIELD      = os.getenv("TEXT_FIELD", "chunk")

AOAI_ENDPOINT   = os.environ["AZURE_OPENAI_ENDPOINT"]            # https://<resource>.openai.azure.com
AOAI_API_VER    = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-10-21")
AOAI_DEPLOYMENT = os.environ["AZURE_OPENAI_DEPLOYMENT"]          # e.g., gpt-4o-mini / o3-mini / gpt-5 preview
AOAI_KEY        = os.getenv("AZURE_OPENAI_API_KEY")              # omit if using AAD

# ---- Clients ----
def get_search_client() -> SearchClient:
    cred = AzureKeyCredential(SEARCH_KEY) if SEARCH_KEY else DefaultAzureCredential()
    return SearchClient(SEARCH_ENDPOINT, SEARCH_INDEX, credential=cred)

def get_aoai_client() -> AzureOpenAI:
    if AOAI_KEY:
        return AzureOpenAI(azure_endpoint=AOAI_ENDPOINT, api_key=AOAI_KEY, api_version=AOAI_API_VER)
    return AzureOpenAI(azure_endpoint=AOAI_ENDPOINT, azure_ad_token_provider=DefaultAzureCredential().get_token, api_version=AOAI_API_VER)


# Lab 1 - RAG Opt

In [2]:

def retrieve_hybrid_enhanced(query: str, top: int = 20, k: int = 50, max_text_recall_size:int = 200):
    sc = get_search_client()
    try:
        vq = VectorizableTextQuery(
            text=query, 
            k=k, 
            fields=VECTOR_FIELD, 
            weight=1.8
            )
        
        # Prefer vector-only search (integrated vectorization). If your index isn't set up for it, this raises.
        results = sc.search(
            search_text=query, 
            vector_queries=[vq],
            top=top, 
            query_type="semantic",
            query_caption="extractive", 
            query_answer='extractive',
            hybrid_search=HybridSearch(max_text_recall_size=max_text_recall_size),
            query_caption_highlight_enabled=True,
            semantic_error_mode="partial"
            )
        mode = "hybrid + semantic"
    except HttpResponseError as e:
        # Fall back to lexical so you still get results while fixing vector config
        results = sc.search(search_text=query, top=k)
        mode = f"lexical (fallback due to: {e.__class__.__name__})"

    hits: List[Dict] = []
    for r in results:
        d = r.copy() if hasattr(r, "copy") else {k2: r[k2] for k2 in r}
        d["score"] = d.get("@search.reranker_score") or d.get("@search.score") or 0.0
        caps = d.get("@search.captions")
        if isinstance(caps, list) and caps:
            d["caption"] = getattr(caps[0], "text", None)
        hits.append(d)

    return mode, hits

In [3]:

def build_context(hits: List[Dict], text_field: str = TEXT_FIELD, max_chars: int = 20000) -> str:
    """Build a compact, numbered context block to feed the model."""
    lines = []
    total = 0
    for i, h in enumerate(hits, 1):
        title     = h.get("title")
        chunk_id  = h.get("chunk_id")
        snippet   = (h.get(text_field) or "")
        if not snippet:
            continue
        snippet = textwrap.shorten(snippet, width=700, placeholder=" ...")
        block = f"[{i}] title={title!r} | chunk_id={chunk_id} | score={h.get('score'):.4f}\n{snippet}"
        if total + len(block) > max_chars:
            break
        total += len(block)
        lines.append(block)
    return "\n\n---\n\n".join(lines)


In [None]:
from prompts import new_system_finance_prompt
from prompts2 import section1, section2, section3, section4, section5, section6, section7, section8

def rag_answer(question: str):
    
    client = get_aoai_client()

    question = f"Create the analysis of this company profile section for Radley + Co. Limited."

    replies =[]
    # Starting loop for section
    sections = [section1, section2, section3, section4, section5, section6, section7, section8]
    
    

    for section in sections:

        mode, hits = retrieve_hybrid_enhanced(section)
        ctx = build_context(hits) 
        
        system_msg = section
        user_msg = f"Question:\n{question}\n\n Context snippets (numbered):\n{ctx}"

        messages = [
            {"role": "system", "content": system_msg},
            {"role": "user",   "content": user_msg},
        ]

        # Try streaming first (SSE). Some networks/proxies block streaming; if so, fall back.
        try:
            text = ""
            stream = client.chat.completions.create(
                model=AOAI_DEPLOYMENT,
                messages=messages,
                stream=True,
                stream_options={"include_usage": True},
            )
            for chunk in stream:
                choices = getattr(chunk, "choices", None)
                if not choices:
                    continue
                delta = getattr(choices[0], "delta", None)
                if not delta:
                    continue
                piece = getattr(delta, "content", None)
                if piece:
                    text += piece
            answer = text if text else "(no text returned)"
            mode_model = "streaming"
        except APIConnectionError:
            resp = client.chat.completions.create(
                model=AOAI_DEPLOYMENT,
                messages=messages,
            )
            answer = resp.choices[0].message.content
            mode_model = "non-streaming (fallback)"

        replies.append(
            {
                "search_mode": mode,
                "model_mode": mode_model,
                "answer": answer,
                "sources": [
                    {"n": i+1, "title": h.get("title"), "chunk_id": h.get("chunk_id"), "score": h.get("score")}
                    for i, h in enumerate(hits)
                ],
            }
        )
    
    return replies


In [13]:
answer = rag_answer(question="Create the company profile of Radley + Co. Limited.")


k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored


k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored


In [17]:
for a in answer:
    print(a['answer'])

Company Snapshot — Radley + Co. Limited (Introduction Table)

- Primary Industry: n/a
- Incorporation Year: n/a
- Headquarters (city, country): n/a
- Number of Employees: 450 (average monthly, FY24)
- Operational KPIs: n/a

Sources:
- Radley + Co. Limited Annual Report, FY24 – Strategic Report (Average monthly number of employees 450 for the 52 weeks ended 27 Apr-24) [2024_radley.pdf, p.4] 
- Radley + Co. Limited Annual Reports FY23 and FY22 reviewed; the available sections provided did not disclose the principal activity/industry label, incorporation year, headquarters or specific operational KPIs beyond staff numbers [2023_radley.pdf; 2022_radley.pdf]
2. Business Overview
- Radley + Co. Limited operates in the retail sector and is exposed to the retail markets in the U.K. and the U.S.A., where consumer disposable income dynamics are an important driver of trading performance. 
- The company’s trading plan assumes no like-for-like growth but incorporates moderate growth from opening n

# 2 RAG Opt with HyDE

In [21]:
import json, re
from typing import List, Dict

def hyde_optimizer(question: str) -> Dict:
    """
    Returns a JSON plan:
      {
        "canonical": "...",                  # tightened canonical query
        "aliases": ["...", "..."],           # entity variants
        "must_terms": ["annual report"],     # hard lexical hints
        "year_hint": "2019..2025",           # optional
        "hyde": "≤120-word hypothetical answer paragraph",
        "rewrites": ["q1", "q2", "q3", "q4"] # diverse facets
      }
    """
    client = get_aoai_client()
    system = (
        "Rewrite the user's ask for enterprise retrieval.\n"
        "- Normalize named entities (legal names, brand variants, tickers).\n"
        "- Add must-have lexical terms if obvious (e.g., 'annual report').\n"
        "- Produce 1 HyDE paragraph (<=120 words).\n"
        "- Produce 4 short diverse rewrites (<=12 tokens each) that cover different facets.\n"
        "- If a year range is implied, include it as 'YYYY..YYYY'."
        "Output strict JSON with keys: canonical, aliases, must_terms, year_hint, hyde, rewrites."
    )
    user = f"User question:\n{question}\nReturn ONLY JSON."

    resp = client.chat.completions.create(
        model=AOAI_DEPLOYMENT,
        messages=[{"role":"system","content":system},{"role":"user","content":user}]
    )
    text = resp.choices[0].message.content.strip()
    try:
        plan = json.loads(text)
    except Exception:
        # ultra-safe fallback
        plan = {
            "canonical": question,
            "aliases": [],
            "must_terms": [],
            "year_hint": "",
            "hyde": question,      # use the question itself if JSON parse fails
            "rewrites": []
        }
    return plan

def hyde_build_bm25_query(plan: Dict) -> str:
    # Canonical + must_terms + hints; keep it short for BM25
    parts = [plan.get("canonical","")]
    parts += plan.get("aliases", [])[:2]  # at most a couple to avoid noise
    parts += plan.get("must_terms", [])
    if plan.get("year_hint"):
        parts.append(plan["year_hint"])
    bm25 = " ".join(p for p in parts if p).strip()
    return re.sub(r"\s+", " ", bm25)

def hyde_build_vector_queries(plan: Dict, per_k: int = 25) -> List:
    """
    Multiple VectorizableTextQuery items in ONE call.
    Azure will RRF-merge them (and then semantic re-rank).
    Weights bias which vectors matter more.
    """
    vqs = []
    # HyDE gets the highest weight
    vqs.append(VectorizableTextQuery(text=plan.get("hyde") or plan.get("canonical",""),
                                     k=per_k, fields=VECTOR_FIELD, weight=2.0))
    # Canonical question
    vqs.append(VectorizableTextQuery(text=plan.get("canonical",""),
                                     k=per_k, fields=VECTOR_FIELD, weight=1.6))
    # Diverse rewrites
    for q in plan.get("rewrites", [])[:4]:
        vqs.append(VectorizableTextQuery(text=q, k=per_k, fields=VECTOR_FIELD, weight=1.2))
    return vqs


In [23]:

def retrieve_hybrid_enhanced(query: str, top: int = 20, k: int = 50, max_text_recall_size:int = 200):
    sc = get_search_client()

    plan = hyde_optimizer(query)
    bm25 = hyde_build_bm25_query(plan)
    vqs  = hyde_build_vector_queries(plan, per_k=k)

    try:
        
        # Prefer vector-only search (integrated vectorization). If your index isn't set up for it, this raises.
        results = sc.search(
            search_text=bm25, 
            vector_queries=vqs,
            top=top, 
            query_type="semantic",
            query_caption="extractive", 
            query_answer='extractive',
            hybrid_search=HybridSearch(max_text_recall_size=max_text_recall_size),
            query_caption_highlight_enabled=True,
            semantic_error_mode="partial"
            )
        mode = "hybrid + semantic"
    except HttpResponseError as e:
        # Fall back to lexical so you still get results while fixing vector config
        results = sc.search(search_text=query, top=k)
        mode = f"lexical (fallback due to: {e.__class__.__name__})"

    hits: List[Dict] = []
    for r in results:
        d = r.copy() if hasattr(r, "copy") else {k2: r[k2] for k2 in r}
        d["score"] = d.get("@search.reranker_score") or d.get("@search.score") or 0.0
        caps = d.get("@search.captions")
        if isinstance(caps, list) and caps:
            d["caption"] = getattr(caps[0], "text", None)
        hits.append(d)

    answers = getattr(results, "answers", None) or []
    semantic_answer = answers[0].text if answers else None  # SDK shape may vary. See docs. :contentReference[oaicite:5]{index=5}


    return mode, hits, plan, semantic_answer

In [18]:

def build_context(hits: List[Dict], text_field: str = TEXT_FIELD, max_chars: int = 20000) -> str:
    """Build a compact, numbered context block to feed the model."""
    lines = []
    total = 0
    for i, h in enumerate(hits, 1):
        title     = h.get("title")
        chunk_id  = h.get("chunk_id")
        snippet   = (h.get(text_field) or "")
        if not snippet:
            continue
        snippet = textwrap.shorten(snippet, width=700, placeholder=" ...")
        block = f"[{i}] title={title!r} | chunk_id={chunk_id} | score={h.get('score'):.4f}\n{snippet}"
        if total + len(block) > max_chars:
            break
        total += len(block)
        lines.append(block)
    return "\n\n---\n\n".join(lines)


In [19]:
from prompts import new_system_finance_prompt
def rag_answer(question: str, k: int = 20, temperature: float = 0.2):

    mode, hits, plan, semantic_answer = retrieve_hybrid_enhanced(question, k=k)
    ctx = build_context(hits)
    client = get_aoai_client()

    if semantic_answer:
        ctx = f"Answer span (verbatim): {semantic_answer}\n---\n{ctx}"
    
    question = f"Create the company profile of Radley + Co. Limited."

    system_msg = new_system_finance_prompt
    user_msg = f"Question:\n{question}\n\nContext snippets (numbered):\n{ctx}"

    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user",   "content": user_msg},
    ]

    # Try streaming first (SSE). Some networks/proxies block streaming; if so, fall back.
    try:
        text = ""
        stream = client.chat.completions.create(
            model=AOAI_DEPLOYMENT,
            messages=messages,
            stream=True,
            stream_options={"include_usage": True},
        )
        for chunk in stream:
            choices = getattr(chunk, "choices", None)
            if not choices:
                continue
            delta = getattr(choices[0], "delta", None)
            if not delta:
                continue
            piece = getattr(delta, "content", None)
            if piece:
                text += piece
        answer = text if text else "(no text returned)"
        mode_model = "streaming"
    except APIConnectionError:
        resp = client.chat.completions.create(
            model=AOAI_DEPLOYMENT,
            messages=messages,
        )
        answer = resp.choices[0].message.content
        mode_model = "non-streaming (fallback)"

    return {
        "search_mode": mode,
        "model_mode": mode_model,
        "answer": answer,
        "optimizer_plan": plan,
        "sources": [
            {"n": i+1, "title": h.get("title"), "chunk_id": h.get("chunk_id"), "score": h.get("score")}
            for i, h in enumerate(hits)
        ],
    }


In [24]:
answer = rag_answer(question="Create the company profile of Radley + Co. Limited.")
print(answer['answer'])

k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored
k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored


Radley + Co. Limited – One‑Pager Restructuring Profile
Sources used: FY24 annual report for the 52 weeks ended 27 Apr-24 (Radley+Co Limited), FY23 annual report for the 53 weeks ended 29 Apr-23, FY22 annual report for the 52 weeks ended 23 Apr-22. Exact page and note references are cited per section below.

1) Introduction Table (Company Snapshot)
- Primary Industry: Fashion accessories
  Source: FY24 AR, Notes, Note 1 “General information” and principal activity (p.19+ per contents; also p.37 of PDF view)
- Incorporation Year: n/a
  Source: n/a in reports reviewed
- Headquarters: Milton Keynes, United Kingdom
  Source: FY24 AR, Company Information, Registered office (company information page; also p.1 of PDF view)
- Number of Employees: n/a
  Source: n/a in reports reviewed
- Operational KPIs: n/a
  Source: n/a in reports reviewed

2) Business Overview
- The company offers multi‑channel distribution of the Radley London affordable luxury accessories brand across wholesale and direct c

# 3 Add re-retreavel

In [30]:
from typing import Dict, List, Tuple

def _distinct_docs(hits: List[Dict]) -> int:
    # adjust keys if you have a stable doc id field
    keys = []
    for h in hits:
        # Prefer an explicit doc id if present; otherwise title is a usable proxy
        keys.append(h.get("id") or h.get("document_id") or h.get("title") or h.get("chunk_id"))
    return len(set([k for k in keys if k]))

def _avg_reranker_score(hits: List[Dict]) -> float:
    scores = [float(h.get("score") or 0.0) for h in hits]
    return sum(scores)/len(scores) if scores else 0.0

def is_grounded(hits: List[Dict],
                min_docs: int = 2,
                min_avg_score: float = 0.20) -> Tuple[bool, Dict]:
    grounded = True
    reasons = {}
    if _distinct_docs(hits) < min_docs:
        grounded = False
        reasons["too_few_docs"] = _distinct_docs(hits)
    if _avg_reranker_score(hits) < min_avg_score:
        grounded = False
        reasons["low_avg_score"] = round(_avg_reranker_score(hits), 4)
    return grounded, reasons

def reretrieve_until_grounded(
    query: str,
    *,
    attempt_profiles=None,
    max_attempts: int = 3,
) -> Tuple[str, List[Dict], Dict]:
    """
    Returns: (mode_used, hits, debug_info)
    attempt_profiles: list of dicts with {top, k, max_text_recall_size, query_override?}
    """
    if attempt_profiles is None:
        attempt_profiles = [
            # Attempt 1: your current defaults
            dict(top=20, k=50, max_text_recall_size=200, label="baseline"),
            # Attempt 2: widen both legs of hybrid (BM25 + vector)
            dict(top=30, k=80, max_text_recall_size=500, label="widen_recall"),
            # Attempt 3: lightweight rewrite (no HyDE required)
            dict(top=30, k=80, max_text_recall_size=800, label="rewrite"),
        ]

    debug = {"attempts": []}
    last_mode, last_hits = "n/a", []

    for i, prof in enumerate(attempt_profiles, 1):
        q = query
        if prof.get("label") == "rewrite":
            # very small, safe rewrite to help BM25 and vector
            # (You can swap in your GPT optimizer here if you want stronger reformulations.)
            q = simple_query_rewrite(query)

        mode, hits = retrieve_hybrid_enhanced(
            q,
            top=prof["top"],
            k=prof["k"],
            max_text_recall_size=prof["max_text_recall_size"]
        )
        grounded, reasons = is_grounded(hits)
        debug["attempts"].append({
            "attempt": i,
            "label": prof.get("label"),
            "params": prof,
            "grounded": grounded,
            "reasons": reasons,
            "num_hits": len(hits),
            "distinct_docs": _distinct_docs(hits),
            "avg_score": _avg_reranker_score(hits),
            "mode": mode,
        })
        last_mode, last_hits = mode, hits

        if grounded:
            return mode, hits, debug

    # If nothing satisfied the grounding rules, return the strongest attempt (last one)
    return last_mode, last_hits, debug


def simple_query_rewrite(q: str) -> str:
    """Deterministic, zero-dependence cleanup that often helps both BM25 and vectors."""
    # normalize whitespace/quotes, remove unhelpful filler words
    import re
    q2 = q.strip().replace("’", "'").replace("“", '"').replace("”", '"')
    q2 = re.sub(r"\b(please|could you|can you|tell me|about)\b", "", q2, flags=re.I)
    q2 = re.sub(r"\s+", " ", q2).strip()
    return q2 or q


In [None]:
from prompts import new_system_finance_prompt
def rag_answer(question: str):

    client = get_aoai_client()

    question = f"Create the company profile of Radley + Co. Limited."

    mode, hits, debug = reretrieve_until_grounded(question)
    ctx = build_context(hits)

    system_msg = new_system_finance_prompt
    user_msg = f"Question:\n{question}\n\n Context snippets (numbered):\n{ctx}"

    messages = [
        {"role": "system", "content": system_msg},
        {"role": "user",   "content": user_msg},
    ]

    # Try streaming first (SSE). Some networks/proxies block streaming; if so, fall back.
    try:
        text = ""
        stream = client.chat.completions.create(
            model=AOAI_DEPLOYMENT,
            messages=messages,
            stream=True,
            stream_options={"include_usage": True},
        )
        for chunk in stream:
            choices = getattr(chunk, "choices", None)
            if not choices:
                continue
            delta = getattr(choices[0], "delta", None)
            if not delta:
                continue
            piece = getattr(delta, "content", None)
            if piece:
                text += piece
        answer = text if text else "(no text returned)"
        mode_model = "streaming"
    except APIConnectionError:
        resp = client.chat.completions.create(
            model=AOAI_DEPLOYMENT,
            messages=messages,
        )
        answer = resp.choices[0].message.content
        mode_model = "non-streaming (fallback)"

    return {
        "search_mode": mode,
        "model_mode": mode_model,
        "answer": answer,
        "retrieval_debug": debug,   # handy for tuning
        "sources": [
            {"n": i+1, "title": h.get("title"), "chunk_id": h.get("chunk_id"), "score": h.get("score")}
            for i, h in enumerate(hits)
        ],
    }


In [32]:
answer = rag_answer(question="Create the company profile of Radley + Co. Limited.")
print(answer['answer'])

k is not a known attribute of class <class 'azure.search.documents._generated.models._models_py3.VectorizableTextQuery'> and will be ignored


Company profile: Radley + Co. Limited

1) Introduction Table (Company Snapshot)
- Source: 2024 AR p.3, p.37; 2024 AR p.1

| Item | Detail |
| --- | --- |
| Primary Industry | Fashion accessories |
| Incorporation Year | n/a |
| Headquarters (city, country) | Milton Keynes, United Kingdom |
| Number of Employees | n/a |
| Operational KPIs | Operated 2 high street stores, 17 outlets and 36 concessions in the period |

Sources:
- Principal activities and operating footprint: 2024 Annual Report (AR), Strategic Report, p.3
- General information and registered office: 2024 AR, Notes, p.37; Company information page listing registered office: 2024 AR p.1

2) Business Overview
- The company operates a multi-channel distribution business for Radley goods, positioning Radley as a modern British icon of accessible luxury and an expert in handbags and accessories (2024 AR p.3).
- The company manages the group’s U.K., European and Asian direct-to-consumer and wholesale distribution and it operated 2

In [None]:
# app.py
import os
import textwrap
from dotenv import load_dotenv, find_dotenv
import json
from rag import (
    retrieve_hybrid_enhanced,
    build_context
)
from typing import List, Dict, Optional
from rag import retrieve_hybrid_enhanced, build_context
from gpts.gpt_assistants import question_to_machine
from openai import OpenAI, APIConnectionError
import streamlit as st

load_dotenv(find_dotenv(), override=True)

# ---- Config (same Azure Search envs you already use) ----
SEARCH_ENDPOINT = os.environ["AZURE_SEARCH_ENDPOINT"]
SEARCH_INDEX    = os.environ["AZURE_SEARCH_INDEX"]
SEARCH_KEY      = os.getenv("AZURE_SEARCH_API_KEY")  # omit if using AAD/RBAC
VECTOR_FIELD    = os.getenv("VECTOR_FIELD")
TEXT_FIELD      = os.getenv("TEXT_FIELD")

# ---- OpenAI (standard) config ----
OPENAI_API_KEY  = os.getenv("FELIPE_OPENAI_API_KEY")        # required
OPENAI_MODEL    = os.getenv("FELIPE_OPENAI_MODEL", "gpt-5")  # e.g., "gpt-5" or "gpt-5-mini"


class WebAgent():

    """
        - This class is responsible to operate calls and allow the usage of websearch
        - The websearch is activated through chat by mentioning "web search" in the paragraph
    """

    def __init__(self,
                k: int = 50,
                max_text_recall_size: int = 200,
                # max_chars: int,
                model: Optional[str] = OPENAI_MODEL,
                top = 20,
                max_output_tokens: int = 1200,
                reasoning_effort: str = "medium",      # "minimal" | "low" | "medium" | "high"
                verbosity: str = "medium",                 # "low" | "medium" | "high"
                tool_choice: str = "none",              # "none" | "auto" | {"type":"tool","name":"..."}
                streaming: bool = False
                ):

        # Parameters settings
        # self.company_name = company_name
        self.k = k
        self.max_text_recall_size = max_text_recall_size
        # self.max_chars = max_chars
        # ===================================
        # RAG PARAMETERS
        self.top = top
        self.k = k
        self.max_text_recall_size

        # ===================================
        # LLM settings
        self.model = model
        # self.temperature = temperature
        # self.top_p = top_p
        self.max_output_tokens = max_output_tokens
        self.reasoning_effort = reasoning_effort
        self.verbosity = verbosity
        self.streaming = streaming

        # OpenAI standard client
        self.web_openai = OpenAI(api_key=OPENAI_API_KEY)

    def _answer(self, question, stream = False):

        # 1. Identify TOOLS call

        # 2. Optimize call
        opt_user_query = question_to_machine(question, OPENAI_API_KEY)

        new_user_query = opt_user_query.output_text

        # 3. Call RAG
        mode, hits = retrieve_hybrid_enhanced(query=new_user_query, top = self.top, k = self.k, max_text_recall_size = self.max_text_recall_size)
        ctx = build_context(hits)
        # 4. Call model

        user_msg = f"Question:\n{new_user_query}\n\nContext snippets (numbered):\n{ctx}"
        system_msg = """"

        You are a restructuring analyst focused on identifying companies in financial distress that could be advisory targets for your company. 
        You prepare comprehensive, accurate and full analysis of companies highlighting liquidity issues, debt maturity risks and covenant pressure. 
        You rely on annual reports and financial statements of companies.

        WHEN the information is NOT FOUND in the context, you USE WEB SEARCH

        **Formatting and Editorial Standards**: 
            - Always **cite sources** 
            - Generate complete profile directly in the chat, take your time and don't compress important things 
            - Always write dates in the format "Mmm-yy" (e.g. Jun-24), fiscal years as "FYXX" (e.g. FY24, LTM1H25), and currencies in millions in the format "£1.2m" 
            - Always double-check revenue split 

        """
        messages = [
            {"role": "system", "content": system_msg},
            {"role": "user",   "content": user_msg},
        ]

        if stream:
            answer_box = st.empty()
            full = ""
            try:
                with self.web_openai.responses.stream(
                    model=self.model,
                    input=messages,
                    tools=[{"type": "web_search"}],
                    tool_choice="auto",
                    # max_output_tokens=self.max_output_tokens,
                    reasoning={"effort": self.reasoning_effort},
                    text={"verbosity": self.verbosity},
                ) as stream:
                    for event in stream:
                        if event.type == "response.output_text.delta":
                            piece = event.delta
                            if piece:
                                full += piece
                                answer_box.markdown(full)
                        elif event.type == "response.error":
                            raise RuntimeError(str(event.error))
                    final = stream.get_final_response()
                    if not full:
                        # fallback to final assembled text if no deltas arrived
                        full = getattr(final, "output_text", "") or ""
                        answer_box.markdown(full)
            except APIConnectionError:
                resp = self.web_openai.responses.create(
                    model=self.model,
                    input=messages,
                    tools=[{"type": "web_search"}],
                    tool_choice="auto",
                    # max_output_tokens=self.max_output_tokens,
                    reasoning={"effort": self.reasoning_effort},
                    text={"verbosity": self.verbosity},
                )
                full = getattr(resp, "output_text", "") or ""
                answer_box.markdown(full)

            return full

        
        resp = self.web_openai.responses.create(
                model=self.model,
                input=messages,
                tools=[{"type": "web_search"}],
                tool_choice="auto",
                # max_output_tokens=self.max_output_tokens,
                reasoning={"effort": self.reasoning_effort},
                text={"verbosity": self.verbosity},
            )
        answer_text = resp.output_text

        return answer_text

In [6]:

import os, textwrap
from typing import List, Dict, Optional

from dotenv import load_dotenv, find_dotenv
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.models import VectorizableTextQuery
from azure.core.exceptions import HttpResponseError
from azure.search.documents.models import HybridSearch

from openai import AzureOpenAI, APIConnectionError

load_dotenv(find_dotenv(), override=True)

# ---- Config (expects the same envs you already used) ----
SEARCH_ENDPOINT = os.environ["AZURE_SEARCH_ENDPOINT"]
SEARCH_INDEX    = os.environ["AZURE_SEARCH_INDEX"]
SEARCH_KEY      = os.getenv("AZURE_SEARCH_API_KEY")  # omit if using AAD/RBAC
VECTOR_FIELD    = os.getenv("VECTOR_FIELD", "text_vector_v4")
TEXT_FIELD      = os.getenv("TEXT_FIELD", "chunk")

AOAI_ENDPOINT   = os.environ["AZURE_OPENAI_ENDPOINT"]            # https://<resource>.openai.azure.com
AOAI_API_VER    = os.environ.get("AZURE_OPENAI_API_VERSION", "2024-10-21")
AOAI_DEPLOYMENT = os.environ["AZURE_OPENAI_DEPLOYMENT"]          # e.g., gpt-4o-mini / o3-mini / gpt-5 preview
AOAI_KEY        = os.getenv("AZURE_OPENAI_API_KEY")              # omit if using AAD


# AzureOpenAI(azure_endpoint=AOAI_ENDPOINT, api_key=AOAI_KEY, api_version=AOAI_API_VER)

client = AzureOpenAI(azure_endpoint=AOAI_ENDPOINT, api_key=AOAI_KEY, api_version=AOAI_API_VER)
messages = [
    {"role": "user",   "content": 'Tell me company names'},
]

# Try streaming first (SSE). Some networks/proxies block streaming; if so, fall back.
text = ""
stream = client.chat.completions.create(
    model=AOAI_DEPLOYMENT,
    messages=messages,
    stream=True,
    stream_options={"include_usage": True},
)

AuthenticationError: Error code: 401 - {'error': {'code': '401', 'message': 'Access denied due to invalid subscription key or wrong API endpoint. Make sure to provide a valid key for an active subscription and use a correct regional API endpoint for your resource.'}}