# 🌐 WebPage Summarizer

**Overview**  
This project is a WebPage Summarizer tool that extracts and condenses key information from any given web page. It utilizes cutting-edge open-source large language models to deliver accurate, concise, and context-aware summaries.

---

## 🔍 Models Used

### 1. **LLaMA 3.2**  
- A powerful and efficient large language model capable of understanding complex web content and producing high-quality natural language summaries.

### 2. **DeepSeek R1:1.5B**  
- A lightweight yet effective open-source LLM, optimized for faster inference and resource-constrained environments, while still providing meaningful summarization.

---

## ✨ Features
- Extracts visible text from web pages.
- Cleans and preprocesses content for LLM input.
- Generat blogs, or documenta


In [None]:
# %pip install selenium webdriver-manager 

In [1]:
# ===========================
# System & Environment
# ===========================
import os

# ===========================
# Web Scraping
# ===========================
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# ===========================
# AI-related
# ===========================
import ipywidgets as widgets
from IPython.display import display, clear_output
from openai import OpenAI
import ollama

In [2]:
model_OLLama="llama3.2"
model_deepseek="deepseek-r1:1.5b"

# 🌐 WebScrapping infrastructure

In [12]:
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager
import ollama

import ipywidgets as widgets
from IPython.display import display, clear_output

# --------------------------------
# Define the DynamicWebScraper class
# --------------------------------
class DynamicWebScraper:
    def __init__(self, url, model_name):
        self.url = url
        self.model = model_name  # 'llama3.2' or 'deepseek-r1:1.5b'
        self.title = ""
        self.text = ""
        self.summary = ""
        self.scrape()
        self.generate_summary()
        self.shutdown_model()

    def scrape(self):
        try:
            chrome_options = Options()
            chrome_options.add_argument("--headless")
            chrome_options.add_argument("--no-sandbox")
            chrome_options.add_argument("--disable-dev-shm-usage")
            chrome_options.add_argument("--disable-gpu")
            chrome_options.add_argument("--window-size=1920,1080")
            chrome_options.add_argument(
                "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0"
            )

            driver = webdriver.Chrome(
                service=Service(ChromeDriverManager().install()), options=chrome_options
            )
            driver.set_page_load_timeout(30)

            print(f"🔍 Loading: {self.url}")
            driver.get(self.url)
            time.sleep(5)

            self.title = driver.title
            html = driver.page_source
            driver.quit()
            print(f"✅ Page Loaded: {self.title}")

            soup = BeautifulSoup(html, "html.parser")
            for tag in soup(["script", "style", "img", "nav", "footer", "header", "input", "button"]):
                tag.decompose()

            main = soup.find("main") or soup.find("article") or soup.body
            raw_text = main.get_text(separator="\n", strip=True) if main else soup.get_text()

            lines = [line.strip() for line in raw_text.split("\n") if line.strip()]
            self.text = "\n".join(lines[:200])
            print(f"📄 Extracted {len(self.text)} characters")

        except Exception as e:
            print(f"❌ Error: {e}")
            self.title = "Error"
            self.text = "Could not extract content."

    def generate_summary(self):
        try:
            if len(self.text) < 100:
                self.summary = "Not enough content to summarize."
                return

            prompt = f"Summarize the following webpage content:\n\n{self.text[:3000]}"

            print(f"🤖 Summarizing using {self.model}...")
            response = ollama.chat(model=self.model, messages=[{"role": "user", "content": prompt}])
            self.summary = response.get("message", {}).get("content", "Summary not found.")
            print(f"📌 Summary generated successfully.")

        except Exception as e:
            print(f"❌ Failed to summarize: {e}")
            self.summary = "Summary generation failed."

    def shutdown_model(self):
        # Ollama handles model lifecycle automatically
        # Models are automatically unloaded when not in use
        print(f"ℹ️ Model {self.model} session completed. Ollama will manage cleanup automatically.")

# --------------------------------
# UI Widgets
# --------------------------------
url_input = widgets.Text(
    value='https://www.nytimes.com/',
    placeholder='Enter website URL',
    description='🌐 URL:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='100%')
)

model_selector = widgets.Dropdown(
    options=[
        ('LLaMA 3.2', 'llama3.2'),
        ('DeepSeek R1 (1.5B)', 'deepseek-r1:1.5b')
    ],
    value='llama3.2',
    description='🤖 Model:',
    style={'description_width': 'initial'}
)

run_button = widgets.Button(
    description='🚀 Run Scraper & Summarizer',
    button_style='success'
)

output = widgets.Output()

# --------------------------------
# Callback Function
# --------------------------------
def run_scraper(button):
    with output:
        clear_output()

        url = url_input.value.strip()
        model = model_selector.value.strip()

        if not url:
            print("❌ Please enter a valid URL.")
            return

        scraper = DynamicWebScraper(url, model)
        print("\n📰 Title:", scraper.title)
        print("🧠 Summary:\n", scraper.summary)

# --------------------------------
# Wire the Button
# --------------------------------
run_button.on_click(run_scraper)

# --------------------------------
# Display UI
# --------------------------------
display(url_input, model_selector, run_button, output)

Text(value='https://www.nytimes.com/', description='🌐 URL:', layout=Layout(width='100%'), placeholder='Enter w…

Dropdown(description='🤖 Model:', options=(('LLaMA 3.2', 'llama3.2'), ('DeepSeek R1 (1.5B)', 'deepseek-r1:1.5b'…

Button(button_style='success', description='🚀 Run Scraper & Summarizer', style=ButtonStyle())

Output()