In [None]:
"""
Week 2 Assignment: LLM Engineering
Author: Nikhil Raut

Notebook: ai_domain_finder.ipynb

Purpose:
Build an agentic AI Domain Finder that proposes short, brandable .com names, verifies availability via RDAP, 
then returns: 
    a list of available .coms, 
    one preferred pick, 
    and a brief audio rationale.
"""


In [None]:
import os
import json
import requests
from typing import Dict, List, Tuple
import re

from dotenv import load_dotenv
from openai import OpenAI
import gradio as gr

load_dotenv(override=True)

OPENAI_MODEL = "gpt-5-nano-2025-08-07"
TTS_MODEL = "gpt-4o-mini-tts"

openai = OpenAI()

In [None]:
RDAP_URL = "https://rdap.verisign.com/com/v1/domain/{}"

def _to_com(domain: str) -> str:
    d = domain.strip().lower()
    return d if d.endswith(".com") else f"{d}.com"

def check_com_availability(domain: str) -> Dict:
    fqdn = _to_com(domain)
    try:
        r = requests.get(RDAP_URL.format(fqdn), timeout=6)
        return {"domain": fqdn, "available": (r.status_code == 404), "status": r.status_code}
    except requests.RequestException:
        return {"domain": fqdn, "available": False, "status": 0}

def check_com_availability_bulk(domains: List[str]) -> Dict:
    """
    Input: list of domain roots or FQDNs.
    Returns:
      {
        "results": [{"domain": "...", "available": bool, "status": int}, ...],
        "available": ["..."],                 # convenience
        "count_available": int
      }
    """
    session = requests.Session()
    results: List[Dict] = []
    for d in domains:
        fqdn = _to_com(d)
        try:
            r = session.get(RDAP_URL.format(fqdn), timeout=6)
            ok = (r.status_code == 404)
            results.append({"domain": fqdn, "available": ok, "status": r.status_code})
        except requests.RequestException:
            results.append({"domain": fqdn, "available": False, "status": 0})

    available = [x["domain"] for x in results if x["available"]]
    return {"results": results, "available": available, "count_available": len(available)}


In [None]:
check_tool_bulk = {
    "type": "function",
    "function": {
        "name": "check_com_availability_bulk",
        "description": "Batch check .com availability via RDAP for a list of domains (roots or FQDNs).",
        "parameters": {
            "type": "object",
            "properties": {
                "domains": {
                    "type": "array",
                    "items": {"type": "string"},
                    "minItems": 1,
                    "maxItems": 50,
                    "description": "List of domain roots or .com FQDNs."
                }
            },
            "required": ["domains"],
            "additionalProperties": False
        }
    }
}

TOOLS = [check_tool_bulk]


In [None]:
def handle_tool_calls(message) -> List[Dict]:
    """
    Translates model tool_calls into tool results for follow-up completion.
    """
    results = []
    for call in (message.tool_calls or []):
        if call.function.name == "check_com_availability":
            args = json.loads(call.function.arguments or "{}")
            payload = check_com_availability(args.get("domain", ""))
            results.append({
                "role": "tool",
                "tool_call_id": call.id,
                "content": json.dumps(payload)
            })
    return results


In [None]:
SYSTEM_PROMPT = """You are the Agent for project "AI Domain Finder".
Goal: suggest .com domains and verify availability using the tool ONLY (no guessing).

Instructions:
- Always propose 5-12 brandable .com candidates based on:
  (1) Industry, (2) Target Customers, (3) Description.
- For each candidate, CALL the tool check_com_availability.
- Respond ONLY after checking all candidates.
- Output Markdown with three sections and these exact headings:
  1) Available .com domains:
     - itemized list (root + .com)
  2) Preferred domain:
     - a single best pick
  3) Audio explanation:
     - 1-2 concise sentences explaining the preference

Constraints:
- Use customer-familiar words where helpful.
- Keep names short, simple, pronounceable; avoid hyphens/numbers unless meaningful.
- Never include TLDs other than .com.
"""


In [None]:
def run_agent_with_tools(history: List[Dict]) -> str:
    """
    history: list of {"role": "...", "content": "..."} messages
    returns assistant markdown string (includes sections required by SYSTEM_PROMPT)
    """
    messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history
    resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)

    while resp.choices[0].finish_reason == "tool_calls":
        tool_msg = resp.choices[0].message
        tool_results = handle_tool_calls(tool_msg)
        messages.append(tool_msg)
        messages.extend(tool_results)
        resp = openai.chat.completions.create(model=OPENAI_MODEL, messages=messages, tools=TOOLS)

    return resp.choices[0].message.content

In [None]:
def extract_audio_text(markdown_reply: str) -> str:
    """
    Pulls the 'Audio explanation:' section; falls back to first sentence.
    """
    marker = "Audio explanation:"
    lower = markdown_reply.lower()
    idx = lower.find(marker.lower())
    if idx != -1:
        segment = markdown_reply[idx + len(marker):].strip()
        parts = segment.split(".")
        return (". ".join([p.strip() for p in parts if p.strip()][:2]) + ".").strip()
    return "This domain is the clearest, most memorable fit for the audience and brand goals."

def synth_audio(text: str) -> bytes:
    audio = openai.audio.speech.create(
        model=TTS_MODEL,
        voice="alloy",
        input=text
    )
    return audio.content


In [None]:
def chat(message: str, history_ui: List[Dict]) -> Tuple[List[Dict], bytes]:
    """
    Gradio ChatInterface callback.
    - message: latest user text (free-form)
    - history_ui: [{"role": "user"/"assistant", "content": "..."}]
    Returns: updated history, audio bytes for the 'Audio explanation'.
    """
    # Convert Gradio UI history to OpenAI-format history
    history = [{"role": h["role"], "content": h["content"]} for h in history_ui]
    history.append({"role": "user", "content": message})

    reply_md = run_agent_with_tools(history)
    history.append({"role": "assistant", "content": reply_md})

    audio_text = extract_audio_text(reply_md)
    audio_bytes = synth_audio(audio_text)

    return history, audio_bytes


In [None]:

_DOMAIN_RE = re.compile(r"\b[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.com\b", re.I)

def _norm_domain(s: str) -> str:
    s = s.strip().lower()
    return s if s.endswith(".com") else f"{s}.com"

def parse_available(md: str) -> list[str]:
    lines = md.splitlines()
    out = []
    in_section = False
    for ln in lines:
        if ln.strip().lower().startswith("1) available .com domains"):
            in_section = True
            continue
        if in_section and ln.strip().lower().startswith("2) preferred"):
            break
        if in_section:
            if ln.strip().startswith(("-", "*")) or _DOMAIN_RE.search(ln):
                for m in _DOMAIN_RE.findall(ln):
                    out.append(_norm_domain(m))
    # dedupe while preserving order
    seen, uniq = set(), []
    for d in out:
        if d not in seen:
            seen.add(d)
            uniq.append(d)
    return uniq

def parse_preferred(md: str) -> str:
    # look in the preferred section; fallback to first domain anywhere
    lower = md.lower()
    idx = lower.find("2) preferred domain")
    if idx != -1:
        seg = md[idx: idx + 500]
        m = _DOMAIN_RE.search(seg)
        if m:
            return _norm_domain(m.group(0))
    m = _DOMAIN_RE.search(md)
    return _norm_domain(m.group(0)) if m else ""

def merge_and_sort(old: list[str], new: list[str]) -> list[str]:
    merged = {d.lower() for d in old} | {d.lower() for d in new}
    return sorted(merged, key=lambda s: (len(s), s))

def fmt_available_md(domains: list[str]) -> str:
    if not domains:
        return "### Available .com domains (cumulative)\n\n*– none yet –*"
    items = "\n".join(f"- `{d}`" for d in domains)
    return f"### Available .com domains (cumulative)\n\n{items}"

def fmt_preferred_md(d: str) -> str:
    if not d:
        return "### Preferred domain\n\n*– not chosen yet –*"
    return f"### Preferred domain\n\n`{d}`"

def build_initial_message(industry: str, customers: str, desc: str) -> str:
    return (
        "Please propose .com domains based on:\n"
        f"Industry: {industry}\n"
        f"Target Customers: {customers}\n"
        f"Description: {desc}"
    )


In [None]:
def initial_submit(industry: str, customers: str, desc: str,
                   history: list[dict], known_avail: list[str], preferred_now: str):
    msg = build_initial_message(industry, customers, desc)
    history = (history or []) + [{"role": "user", "content": msg}]

    reply_md, new_avail, preferred, audio_text = run_and_extract(history)
    history += [{"role": "assistant", "content": reply_md}]

    all_avail = merge_and_sort(known_avail or [], new_avail)
    preferred_final = preferred or preferred_now or ""
    audio_bytes = synth_audio(audio_text)

    return (
        history,                         # s_history
        all_avail,                       # s_available
        preferred_final,                 # s_preferred
        gr.update(value=fmt_preferred_md(preferred_final)),   # preferred_md
        gr.update(value=fmt_available_md(all_avail)),         # available_md
        gr.update(value="", visible=True),                    # reply_in -> now visible
        gr.update(value=audio_bytes, visible=True),           # audio_out
    )

def refine_submit(reply: str,
                  history: list[dict], known_avail: list[str], preferred_now: str):
    if not reply.strip():
        return ("", history, known_avail, preferred_now,
                gr.update(), gr.update(), gr.update())

    history = (history or []) + [{"role": "user", "content": reply.strip()}]
    reply_md, new_avail, preferred, audio_text = run_and_extract(history)
    history += [{"role": "assistant", "content": reply_md}]

    all_avail = merge_and_sort(known_avail or [], new_avail)
    preferred_final = preferred or preferred_now or ""
    audio_bytes = synth_audio(audio_text)

    return (
        "",                                # clear Reply box
        history,                           # s_history
        all_avail,                         # s_available
        preferred_final,                   # s_preferred
        gr.update(value=fmt_preferred_md(preferred_final)),  # preferred_md
        gr.update(value=fmt_available_md(all_avail)),        # available_md
        gr.update(value=audio_bytes, visible=True),          # audio_out
    )


In [None]:
with gr.Blocks(title="AI Domain Finder (.com only)") as ui:
    gr.Markdown("# AI Domain Finder (.com only)")
    gr.Markdown("Agent proposes .com domains, verifies via RDAP, picks a preferred choice, and explains briefly.")

    # App state
    s_history = gr.State([])
    s_available = gr.State([])
    s_preferred = gr.State("")

    with gr.Row():
        with gr.Column(scale=7):  # LEFT 70%
            with gr.Group():
                industry_in = gr.Textbox(label="Industry")
                customers_in = gr.Textbox(label="Target Customers")
                desc_in = gr.Textbox(label="Description", lines=3)
                find_btn = gr.Button("Find Domains", variant="primary")

            audio_out = gr.Audio(label="Audio explanation", autoplay=True, visible=False)

            reply_in = gr.Textbox(
                label="Reply",
                placeholder="Chat with agent to refine the outputs",
                lines=2,
                visible=False,  # 👈 hidden for the first input
            )

        with gr.Column(scale=3):  # RIGHT 30%
            preferred_md = gr.Markdown(fmt_preferred_md(""))
            available_md = gr.Markdown(fmt_available_md([]))

    # Events
    find_btn.click(
        initial_submit,
        inputs=[industry_in, customers_in, desc_in, s_history, s_available, s_preferred],
        outputs=[
            s_history, s_available, s_preferred,
            preferred_md, available_md,
            reply_in,           # 👈 becomes visible after first run
            audio_out           # 👈 becomes visible after first run
        ],
    )

    reply_in.submit(
        refine_submit,
        inputs=[reply_in, s_history, s_available, s_preferred],
        outputs=[
            reply_in, s_history, s_available, s_preferred,
            preferred_md, available_md, audio_out
        ],
    )

ui.launch(inbrowser=True, show_error=True)
