In [1]:

import requests
import re
import ollama
import ipywidgets as widgets
from IPython.display import display, HTML, Markdown, clear_output
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache
from datetime import datetime

# ─── SOFT-PROMPT LOADING ───────────────────────────────────────────────────
try:
    with open("soft_prompt.txt", "r", encoding="utf-8") as f:
        SOFT_PROMPT = f.read().strip()
except FileNotFoundError:
    SOFT_PROMPT = ""

# ─── DYNAMIC MODEL DISCOVERY ─────────────────────────────────────────────────
def get_installed_model_pairs():
    try:
        raw = ollama.list()
        entries = raw.get("models", raw)
        pairs = []
        for m in entries:
            model_id = m.get("model", m.get("name"))
            display_id = m.get("name", model_id)
            pairs.append((display_id, model_id))
        return pairs
    except Exception:
        return []

MODEL_PAIRS = get_installed_model_pairs()
MODELS = [disp for disp, _ in MODEL_PAIRS]
MODEL_ALIASES = {disp: model_id for disp, model_id in MODEL_PAIRS}
MAX_WORKERS = 3

# Persistent HTTP session
SESSION = requests.Session()

# ─── SELENIUM CHROME OPTIONS FOR SCRAPING ───────────────────────────────────
CHROME_OPTS = uc.ChromeOptions()
for arg in ["--disable-gpu", "--no-sandbox", "--disable-dev-shm-usage"]:
    CHROME_OPTS.add_argument(arg)
CHROME_OPTS.add_argument("--disable-blink-features=AutomationControlled")
CHROME_OPTS.add_experimental_option("excludeSwitches", ["enable-automation"])
CHROME_OPTS.add_experimental_option("useAutomationExtension", False)
# Optional headless:
# CHROME_OPTS.add_argument("--headless")

# Cache for fetched articles
_scraped_cache = {}

# ─── HELPERS ──────────────────────────────────────────────────────────────
@lru_cache(maxsize=128)
def _cleanup_reasoning(raw: str) -> str:
    return re.sub(
        r'^(?:\s*(?:Okay|Alright|First|Next|Finally|Let me|I need)[^\.\!?]*[\.\!?])+',
        '', raw, flags=re.IGNORECASE
    ).strip()

# ─── ARTICLE FETCHER WITH JS/ADBLOCK OVERRIDE ─────────────────────────────────
def fetch_article(url: str, timeout: int = 15) -> tuple[str, str]:
    if url in _scraped_cache:
        return _scraped_cache[url]
    try:
        driver = uc.Chrome(options=CHROME_OPTS)
        driver.execute_cdp_cmd(
            'Page.addScriptToEvaluateOnNewDocument',
            {'source': "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"}
        )
        driver.get(url)
        WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'article, main'))
        )
        driver.execute_script(
            '''
            document.querySelectorAll(
              '.overlay, .modal, .adblock-warning, body > div[style*="position: fixed"], .cookie-consent'
            ).forEach(el => el.remove());
            '''
        )
        try:
            container = driver.find_element(By.TAG_NAME, 'article')
        except:
            container = driver.find_element(By.TAG_NAME, 'main')
        html = container.get_attribute('outerHTML')
        title = driver.title or "No Title"
        driver.execute_script('window.scrollBy(0, 500)')
        content = BeautifulSoup(html, 'html.parser').get_text("\n", strip=True)
        driver.quit()
    except Exception:
        resp = SESSION.get(url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
        soup = BeautifulSoup(resp.content, "html.parser")
        title = soup.title.string.strip() if soup.title and soup.title.string else "No Title"
        content = "\n".join(p.get_text(strip=True) for p in soup.find_all("p"))
    _scraped_cache[url] = (title, content)
    return title, content

# ─── LLM CHAT WRAPPER ──────────────────────────────────────────────────────
def llm_chat(model_id, messages, temperature, top_p, max_tokens):
    params = {"model": model_id, "messages": messages}
    options = {"temperature": temperature, "top_p": top_p, "max_tokens": max_tokens}
    if SOFT_PROMPT:
        params["messages"] = [{"role": "system", "content": SOFT_PROMPT}] + messages
    return ollama.chat(**params, options=options)["message"]["content"].strip()

# ─── SUMMARIZATION & REFINEMENT ─────────────────────────────────────────────
def summarize(text: str, label: str, temperature: float, top_p: float, max_tokens: int) -> str:
    model_id = MODEL_ALIASES[label]
    system = (
        "You’re a senior news editor at a major outlet—produce a natural-sounding, balanced summary of the provided text in 5–7 sentences. "
        "Use whatever content is available; do not ask for more text or apologize if it’s brief. Output ONLY the summary."
    )
    user = f"Article (first 2000 chars):\n{text[:2000]}"
    messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
    draft = llm_chat(model_id, messages, temperature, top_p, max_tokens)
    return _cleanup_reasoning(draft)

# ─── BUILD INSTAGRAM POST MERGED ───────────────────────────────────────────
def build_instagram_post(summary: str, label: str, temperature: float, top_p: float, max_tokens: int) -> str:
    model_id = MODEL_ALIASES[label]
    system = (
        "You are a professional social media strategist—create a natural, engaging Instagram caption of 2–3 sentences, incorporating emojis and relevant hashtags. "
        "Use the provided summary directly; output ONLY the caption."
    )
    user = f"Summary:\n{summary}"
    messages = [{"role": "system", "content": system}, {"role": "user", "content": user}]
    resp = ollama.chat(
        model=model_id,
        messages=messages,
        options={"temperature": temperature, "top_p": top_p, "max_tokens": max_tokens}
    )
    return resp["message"]["content"].strip()

# ─── PARALLEL PROCESSING WITH STREAMING & LOADING MESSAGE ─────────────────────────────────
def parallel_llm(func, text, labels, temperature, top_p, max_tokens, output_widget, title):
    output_widget.clear_output()
    display(Markdown(f"⏳ Generating {title.lower()}..."))
    progress = widgets.IntProgress(min=0, max=len(labels), description=title)
    display(progress)
    with output_widget:
        futures = {}
        for label in labels:
            futures[ThreadPoolExecutor().submit(func, text, label, temperature, top_p, max_tokens)] = label
        for fut in as_completed(futures):
            label = futures[fut]
            try:
                res = fut.result()
            except Exception:
                res = "❌ Error"
            display(Markdown(f"**Model: {label}**  \n---\n{res}"))
            progress.value += 1
    progress.close()
    display(Markdown(f"✅ Completed generating {title.lower()}!"))

# ─── UI SETUP ──────────────────────────────────────────────────────────────
HEADER = HTML("<h2 style='margin-bottom:10px; color:#333;'>📰 News to Instagram</h2>")
url_input = widgets.Text(
    placeholder="Paste article URL…",
    layout=widgets.Layout(width="60%", margin="0 10px 0 0")
)
params_box = widgets.HBox([
    widgets.FloatSlider(value=0.3, min=0.0, max=1.0, step=0.05, description='Temp'),
    widgets.FloatSlider(value=0.9, min=0.0, max=1.0, step=0.05, description='Top-p'),
    widgets.IntSlider(value=400, min=100, max=1000, step=50, description='Max-tokens')
], layout=widgets.Layout(margin="10px 0", gap="20px"))
checkboxes = widgets.VBox([
    widgets.Checkbox(value=True, description=lbl) for lbl in MODELS
], layout=widgets.Layout(margin="10px 0"))
buttons = widgets.HBox([
    widgets.Button(description="🧠 Summarize", button_style="primary", layout=widgets.Layout(width="150px")),
    widgets.Button(description="📸 Generate Post", button_style="info", layout=widgets.Layout(width="150px")),
    widgets.Button(description="🧹 Clear", button_style="warning", layout=widgets.Layout(width="150px"))
], layout=widgets.Layout(margin="10px 0", gap="20px"))
output = widgets.Output(layout=widgets.Layout(border="1px solid #ccc", padding="10px", height="500px", overflow_y="auto"))

# CALLBACKS ───────────────────────────────────────────────────────────────
def on_summarize(_):
    url = url_input.value.strip()
    if not url:
        with output: clear_output(); display(Markdown("⚠️ URL required")); return
    title, text = fetch_article(url)
    labels = [cb.description for cb in checkboxes.children if cb.value]
    if not labels:
        with output: clear_output(); display(Markdown("⚠️ Select model(s)")); return
    with output: clear_output(); display(Markdown(f"## 🧠 Summaries for: **{title}**"))
    parallel_llm(
        summarize, text, labels,
        params_box.children[0].value,
        params_box.children[1].value,
        params_box.children[2].value,
        output, title="Summaries"
    )

def on_instagram(_):
    url = url_input.value.strip()
    if not url:
        with output: clear_output(); display(Markdown("⚠️ URL required")); return
    title, text = fetch_article(url)
    labels = [cb.description for cb in checkboxes.children if cb.value]
    if not labels:
        with output: clear_output(); display(Markdown("⚠️ Select model(s)")); return
    with output: clear_output(); display(Markdown(f"## 📸 Instagram Posts for: **{title}**"))
    parallel_llm(
        build_instagram_post, text, labels,
        params_box.children[0].value,
        params_box.children[1].value,
        params_box.children[2].value,
        output, title="Instagram"
    )

def on_clear(_):
    url_input.value = ""
    for cb in checkboxes.children:
        cb.value = True
    output.clear_output()

# Hook up buttons
buttons.children[0].on_click(on_summarize)
buttons.children[1].on_click(on_instagram)
buttons.children[2].on_click(on_clear)

# Render UI
display(HEADER, url_input, params_box, checkboxes, buttons, output)



Text(value='', layout=Layout(margin='0 10px 0 0', width='60%'), placeholder='Paste article URL…')

HBox(children=(FloatSlider(value=0.3, description='Temp', max=1.0, step=0.05), FloatSlider(value=0.9, descript…

VBox(children=(Checkbox(value=True, description='deepseek-r1:8b-gpu'), Checkbox(value=True, description='gemma…

HBox(children=(Button(button_style='primary', description='🧠 Summarize', layout=Layout(width='150px'), style=B…

Output(layout=Layout(border_bottom='1px solid #ccc', border_left='1px solid #ccc', border_right='1px solid #cc…