## Load Libraries

In [None]:
import os # For interacting with the operating system
import requests # For making HTTP requests
import subprocess # For running external commands
import textwrap
from dotenv import load_dotenv # For loading environment variables
from bs4 import BeautifulSoup # For parsing and extracting data from HTML or XML documents
from IPython.display import Markdown, display  # For displaying formatted Markdown in Jupyter notebooks
from openai import OpenAI # For interacting with the OpenAI API

from selenium import webdriver
from selenium.webdriver.chrome.options import Options # For configuring Selenium Chrome options

## Load Environment Key

In [30]:
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()

# Go up one level to the project folder
project_dir = os.path.dirname(script_dir)
env_path = os.path.join(project_dir, "env_keys", ".env")

# Access the variable
load_dotenv(dotenv_path=env_path)
api_key = os.getenv("OPENAI_API_KEY")
print("API Key loaded:", api_key is not None)

API Key loaded: True


## Ollma Initialize

In [None]:
subprocess.Popen("ollama serve", shell=True)

<Popen: returncode: None args: 'ollama serve'>

In [42]:
requests.get("http://localhost:11434").content

b'Ollama is running'

In [None]:
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
print(result.stdout)

NAME               ID              SIZE      MODIFIED    
llama3.2:latest    a80c4f17acd5    2.0 GB    5 weeks ago    



In [45]:
!ollama pull llama3.2

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling dde5aa3fc5ff: 100% ▕██████████████████▏ 2.0 GB                         [K
pulling 966de95ca8a6: 100% ▕██████████████████▏ 1.4 KB                         [K
pulling fcc5a6bec9da: 100% ▕██████████████████▏ 7.7 KB                         [K
pulling a70ff7e570d9: 100% ▕██████████████████▏ 6.0 KB          

In [47]:
OLLAMA_API_URL = "http://localhost:11434/v1"

## Initialize OpenAI Client for Ollma

In [48]:
openai = OpenAI(base_url=OLLAMA_API_URL, api_key=api_key)

## Selenium Support Static And Dynamic Webpage Filter

In [51]:
class Website:
    """
    A class to fetch, parse, and extract readable content from web pages.

    It supports both static (HTML-only) and dynamic (JavaScript-rendered) websites.
    """

    def __init__(self, url: str, use_selenium: bool = False):
        """
        Initializes the Website object and extracts the webpage content.

        Args:
            url (str): The target webpage URL.
            use_selenium (bool): Whether to use Selenium for JS-rendered pages.
                                 Default is False (use simple requests).
        """
        self.url = url
        self.title = None
        self.content = None

        if use_selenium:
            # --- For dynamic or JavaScript-rendered pages ---

            # Configure Chrome to run in headless mode (no visible browser window)
            options = Options()
            options.add_argument("--headless")         # Run Chrome without a GUI
            options.add_argument("--disable-gpu")      # Disable GPU acceleration
            options.add_argument("--no-sandbox")       # Required for some environments

            # Initialize the Chrome WebDriver with the specified options
            driver = webdriver.Chrome(options=options)
            # Load the webpage
            driver.get(url)
            # Get the fully rendered page source
            rendered_html = driver.page_source
            # Close the WebDriver 
            driver.quit()
            # Parse the HTML using BeautifulSoup
            soup = BeautifulSoup(rendered_html, 'html.parser')
        
        else:
            # --- For static HTML-only pages ---   

            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
        
        self.title = soup.title.string if soup.title else 'No Title'
        for irrelevant in soup(['script', 'style', 'header', 'footer', 'nav', 'aside']):
            irrelevant.decompose()
        
        self.content = soup.body.get_text(separator="/n", strip=True)

## Extract And Display Webpage Content

In [52]:
nationalgeographic = Website("https://www.nationalgeographic.com/science/article/green-comet-lemmon-2025", use_selenium=True)
nationalgeographic_title = nationalgeographic.title

snippet  = nationalgeographic.content[:]
wrapped_snippet = textwrap.fill(snippet, width=80)

print(f" =={nationalgeographic_title}== \n")
print(wrapped_snippet[:520])

 ==Here‘s how to see a rare green comet this week | National Geographic== 

SCIENCE/nNIGHT SKY GUIDE/nThis rare green comet won’t return for a thousand
years. Here’s how to see it this week./nComet C/2025 A6 (Lemmon) is this year’s
brightest comet visible from Earth—an icy relic from the solar system’s origins
that won’t return until the next millennium./nComet C/2025 A6 (Lemmon), captured
from the Dark Sky Alqueva Observatory in Portugal on October 2, 2025, glows
green as it nears its closest approach to Earth. Here’s how to see this rare
green comet before it disappears for a millennium.


## OpenAI Message Helpers

In [53]:
def system_message() -> str:
    """System-level instruction for OpenAI."""
    return "You are a helpful assistant that summarizes webpage content in Markdown."


def build_user_message(website: Website) -> str:
    """Create the user message including page title and snippet."""
    snippet = textwrap.fill(website.content, width=80)
    return (
        f"You are looking at content from the webpage titled: **{website.title}**\n\n"
        f"Below is the extracted content. Please provide a concise Markdown summary "
        f"that captures the main ideas clearly and accurately:\n\n"
        f"{snippet}"
    )

def build_messages(website: Website):
    """Combine system and user messages into one list."""
    return [
        {"role": "system", "content": system_message()},
        {"role": "user", "content": build_user_message(website)},
    ]

## Summarization

In [54]:
def summarize_website(url: str) -> str:
    """Fetch and summarize the given website using OpenAI."""
    website = Website(url)
    messages = build_messages(website)

    response = openai.chat.completions.create(
        # model="gpt-4o-mini",
        model="llama3.2",
        messages=messages,
        temperature=0.7
    )

    summary = response.choices[0].message.content
    return summary


def display_summary(url: str):
    """Render the summary nicely in Markdown."""
    summary = summarize_website(url)
    display(Markdown(summary))


In [55]:
display_summary(nationalgeographic.url)

**Rare Green Comet: How to See it this Week**
=============================================

A rare green comet, Comet C/2025 A6 (Lemmon), is visible from Earth for the first time in over a thousand years. Here's how to see it:

### What is Comet Lemmon?

Comet Lemmon is an icy relic from the solar system's origins that will not return until the next millennium.

### How to Spot it

*   Date: October 21
*   Location: Near Scorpius or Libra, low on the western horizon shortly after sunset.
*   Time: Use stargazing apps or NASA's Sky Events calendar to check exact rise and set times for your location.
*   Binoculars and cameras can enhance your viewing experience.

### Fun Facts

*   Comet Lemmon glows green due to the presence of diatomic carbon (C2).
*   The comet has a dynamic shape and brightness that can change within hours as sunlight and heat reshape its surface.
*   Comets are treasure troves of the original building blocks of our solar system, with much of their dust and ice remaining unchanged over billions of years.

### Additional Events

*   Peak of the Orionid meteor shower (October 21)
*   Rare double feature in the night sky
*   Opportunities for sample return missions to study comets up close