In [1]:
import requests
import time
import re
import random
import ollama
import ipywidgets as widgets
from IPython.display import display, Markdown, clear_output
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from datetime import datetime
from threading import Thread


latest_summary = {"text": "", "url": ""}
summary_style = {"mode": "Short"}
scraped_cache = {}

COMPARE_MODELS = ["llama3.2", "gemma3", "deepseek-r1:8b"]



In [2]:
def get_models():
    try:
        raw = ollama.list()
        return [m.get("name", m.get("model", "Unnamed")) for m in raw.get("models", raw)]
    except Exception:
        return ['Error loading models']

In [3]:
url_input = widgets.Text(
    description='URL:',
    placeholder='Paste article link here...',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='90%')
)

format_toggle = widgets.ToggleButtons(
    options=['Short', 'Bullet Points', 'Detailed'],
    value='Short',
    description='Format:',
    style={'description_width': 'initial'}
)


model_options = ["llama3.2", "gemma3", "deepseek-r1:8b"]
model_checkboxes = [widgets.Checkbox(value=(m in ["llama3.2", "gemma3"]), description=m) for m in model_options]
model_selector_box = widgets.VBox(model_checkboxes)

summarize_button = widgets.Button(description='Summarize', button_style='primary')
instagram_button = widgets.Button(description='Instagram Post', button_style='info')
download_button = widgets.Button(description='⬇️Summary', button_style='', tooltip='Download last summary')
clear_button = widgets.Button(description='🧹 Clear Output', button_style='warning', tooltip='Clear displayed output')

output = widgets.Output()


In [4]:
class Website:
    def __init__(self, url, wait_time=20):
        self.url = url
        self.wait_time = wait_time
        self.title, self.text = self._load_page()

    def _load_page(self):
        try:
            return self._load_with_selenium()
        except Exception as e:
            print(f"⚠️ Selenium failed: {e}. Falling back to requests.")
            return self._load_with_requests()

    def _load_with_selenium(self):
        options = uc.ChromeOptions()
        options.add_argument("--disable-gpu")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--disable-blink-features=AutomationControlled")
        options.add_argument("start-maximized")
        options.add_argument("user-agent=Mozilla/5.0")

        driver = uc.Chrome(options=options)
        try:
            driver.get(self.url)
            time.sleep(8)
            WebDriverWait(driver, self.wait_time).until(EC.presence_of_element_located((By.TAG_NAME, "main")))
            html = driver.find_element(By.TAG_NAME, "main").get_attribute("outerHTML")
            soup = BeautifulSoup(html, "html.parser")
            return driver.title or "No Title Found", soup.get_text(separator="\n", strip=True)
        finally:
            driver.quit()

    def _load_with_requests(self):
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(self.url, headers=headers, timeout=10)
        soup = BeautifulSoup(response.content, "html.parser")
        title = soup.title.get_text(strip=True) if soup.title else "No Title Found"
        paragraphs = soup.find_all("p")
        text = "\n".join([p.get_text() for p in paragraphs])
        return title, text


In [5]:
def user_prompt_for(website):
    mode = summary_style["mode"]
    if mode == "Short":
        instruction = "Summarize this article briefly in 3–4 sentences."
    elif mode == "Bullet Points":
        instruction = "Summarize this article in 5 bullet points."
    else:
        instruction = "Write a detailed summary with key highlights."

    return (
        f"Website Title: {website.title}\n"
        f"Content:\n{website.text[:2000]}\n\n"
        f"{instruction}"
    )

def messages_for(website):
    return [
        {"role": "system", "content": "You are a helpful assistant that summarizes website content in Markdown, ignoring navigation-related text."},
        {"role": "user", "content": user_prompt_for(website)}
    ]


In [6]:
def extract_headline(summary):
    sentences = re.split(r'(?<=[.!?])\s+', summary.strip())
    return sentences[0] if sentences else summary[:100]

def extract_relevant_emojis(summary):
    mapping = {
        "tariff": "💸", "renewable": "🌱", "solar": "☀️", "energy": "⚡",
        "judge": "⚖️", "climate": "🌡️", "policy": "📜", "trade": "🌐",
        "u.s.": "🇺🇸", "election": "🗳️", "technology": "🔬", "innovation": "🚀",
        "investment": "💰", "job": "👷", "covid": "😷", "health": "🏥",
        "market": "📈", "music": "🎶", "sport": "🏅", "entertainment": "🎬"
    }
    return " ".join({emoji for word, emoji in mapping.items() if re.search(r'\b' + word + r'\b', summary.lower())}) or "📰"

def extract_relevant_hashtags(summary):
    mapping = {
        "tariff": "TariffWatch", "renewable": "Renewable", "solar": "SolarPower", "energy": "EnergyNews",
        "judge": "Justice", "climate": "ClimateChange", "policy": "PolicyUpdate", "trade": "GlobalTrade",
        "u.s.": "USA", "election": "Election2024", "technology": "TechTrends", "innovation": "Innovation",
        "investment": "Investment", "job": "Jobs", "covid": "COVID19", "health": "HealthNews",
        "market": "MarketWatch", "music": "MusicNews", "sport": "SportsUpdate", "entertainment": "Entertainment"
    }
    hashtags = ["#" + tag for word, tag in mapping.items() if re.search(r'\b' + word + r'\b', summary.lower())]
    if not hashtags:
        keywords = [w for w in re.findall(r'\w+', summary.lower()) if len(w) > 4 and w not in {
            "the", "this", "with", "that", "from", "will", "have", "been", "were", "their", "which"
        }]
        hashtags = ["#" + w.capitalize() for w in random.sample(list(set(keywords)), min(3, len(set(keywords))))]
    return " ".join(hashtags)

def build_dynamic_template(summary):
    headline = extract_headline(summary)
    emojis = extract_relevant_emojis(summary)
    hashtags = extract_relevant_hashtags(summary)
    words = summary.split()
    body = " ".join(words[:30]) + ("..." if len(words) > 30 else "")
    if len(words) > 80:
        return f"{emojis} {headline}\n\nTake a closer look: {body}\n\nYour thoughts? {emojis} {hashtags}"
    return f"{emojis} {headline}\n\n{hashtags}"

def generate_instagram_post(summary, model):
    import re

    draft = build_dynamic_template(summary)
    prompt = (
        "Refine the following draft into a polished Instagram post. "
        "Output ONLY the final post text. "
        "Do NOT include any headers, explanations, commentary, or labels like 'Final Version'. "
        "If you must think, do it internally — only output the final Instagram post directly.\n\n"
        f"Draft:\n{draft}"
    )

    messages = [
        {"role": "system", "content": "You are a creative assistant who ONLY outputs the final Instagram post without any step-by-step reasoning, commentary, or explanation."},
        {"role": "user", "content": prompt}
    ]

    response = ollama.chat(model=model, messages=messages)
    raw_output = response["message"]["content"].strip()

    # ✅ Step 1: Clean unwanted sentences like "Alright let's break it down"
    # Strategy: Remove any first sentences that sound like "thought process"
    # Use regex to cut off at first proper sentence after a long paragraph
    cleanup_patterns = [
        r"(?i)(alright.*?step by step\.?)",     # remove "Alright, let's break this down step by step."
        r"(?i)(first,.*?provided\.?)",           # remove "First, I should identify the key elements..."
        r"(?i)(next,.*?instagram users\.?)",     # remove "Next, I need to condense..."
        r"(?i)(finally,.*?instagram's style\.?)" # remove "Finally, I'll craft it..."
    ]

    for pattern in cleanup_patterns:
        raw_output = re.sub(pattern, "", raw_output, flags=re.DOTALL)

    # ✅ Step 2: Remove any common unwanted prefaces like "Note:" or "Final Version:"
    raw_output = re.sub(r"(?i)(note:|final version:|here is your.*?:)", "", raw_output).strip()

    return raw_output.strip()

    
def fetch_summary_parallel(model, website, results_dict):
    try:
        messages = messages_for(website)
        response = ollama.chat(model=model, messages=messages)
        results_dict[model] = response["message"]["content"]
    except Exception as e:
        results_dict[model] = f"❌ Error: {str(e)}"



In [7]:
def on_summarize_click(b):
    with output:
        clear_output()
        display(Markdown("⏳ **Scraping and processing article...**"))

    try:
        url = url_input.value.strip()
        summary_style["mode"] = format_toggle.value
        selected_models = [cb.description for cb in model_checkboxes if cb.value]    # <-- Get selected models

        if not selected_models:
            with output:
                clear_output()
                print("⚠️ Please select at least one model.")
            return

        # Cache website object
        if url in scraped_cache:
            website = scraped_cache[url]
        else:
            website = Website(url)
            scraped_cache[url] = website

        # Parallel model execution
        from collections import defaultdict
        summaries = defaultdict(str)
        threads = []

        with output:
            clear_output()
            display(Markdown(f"## 🧠 Summary Comparison: **{website.title}**"))
            display(Markdown("🔄 Generating summary from selected models..."))

        for model in selected_models:
            t = Thread(target=fetch_summary_parallel, args=(model, website, summaries))
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        latest_summary["text"] = summaries[selected_models[0]]
        latest_summary["url"] = url

        # Display all selected model summaries side-by-side
        with output:
            clear_output()
            boxes = []
            for model in selected_models:
                box = widgets.Output()
                with box:
                    display(Markdown(f"### 🧩 Model: `{model}`\n\n{summaries[model]}"))
                boxes.append(box)
            display(widgets.HBox(boxes))

    except Exception as e:
        with output:
            clear_output()
            print(f"Error: {str(e)}")


def on_instagram_click(b):
    with output:
        clear_output()
        display(Markdown("⏳ **Scraping and generating posts...**"))

    try:
        url = url_input.value.strip()
        selected_models = [cb.description for cb in model_checkboxes if cb.value]  # <-- Get selected models

        if not selected_models:
            with output:
                clear_output()
                print("⚠️ Please select at least one model.")
            return

        # Cache website object
        if url in scraped_cache:
            website = scraped_cache[url]
        else:
            website = Website(url)
            scraped_cache[url] = website

        # Parallel summarization
        from collections import defaultdict
        summaries = defaultdict(str)
        threads = []

        for model in selected_models:
            t = Thread(target=fetch_summary_parallel, args=(model, website, summaries))
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        # Generate posts from summaries
        posts = []
        for model in selected_models:
            post = generate_instagram_post(summaries[model], model)
            posts.append(post)

        # Display all selected Instagram posts side-by-side
        with output:
            clear_output()
            display(Markdown(f"## 📸 Instagram Post Comparison: **{website.title}**"))
            boxes = []
            for model in selected_models:
                box = widgets.Output()
                with box:
                    display(Markdown(f"### ✍️ Model: `{model}`\n\n{posts[selected_models.index(model)]}"))
                boxes.append(box)
            display(widgets.HBox(boxes))

    except Exception as e:
        with output:
            clear_output()
            print(f"Error: {str(e)}")

def on_download_click(b):
    with output:
        clear_output()
        if latest_summary["text"]:
            filename = f"summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
            with open(filename, "w", encoding="utf-8") as f:
                f.write(f"URL: {latest_summary['url']}\n\n{latest_summary['text']}")
            print(f"✅ Summary saved as `{filename}` in your working directory.")
        else:
            print("⚠️ No summary available to download.")


def on_clear_click(b):
    with output:
        clear_output()
    url_input.value = ""
    latest_summary["text"] = ""
    latest_summary["url"] = ""
    # Optional: clear checkboxes too
    for checkbox in model_checkboxes:
        checkbox.value = False


In [8]:
summarize_button.on_click(on_summarize_click)
instagram_button.on_click(on_instagram_click)
download_button.on_click(on_download_click)
clear_button.on_click(on_clear_click)

display(
    url_input,
    format_toggle,
    widgets.Label("Models:"),
    model_selector_box,
    widgets.HBox([summarize_button, instagram_button, download_button, clear_button]),
    output
)


Text(value='', description='URL:', layout=Layout(width='90%'), placeholder='Paste article link here...', style…

ToggleButtons(description='Format:', options=('Short', 'Bullet Points', 'Detailed'), style=ToggleButtonsStyle(…

Label(value='Models:')

VBox(children=(Checkbox(value=True, description='llama3.2'), Checkbox(value=True, description='gemma3'), Check…

HBox(children=(Button(button_style='primary', description='Summarize', style=ButtonStyle()), Button(button_sty…

Output()