## Load Libraries

In [1]:
import os # For interacting with the operating system
import requests # For making HTTP requests
import subprocess # For running external commands
import textwrap
from dotenv import load_dotenv # For loading environment variables
from bs4 import BeautifulSoup # For parsing and extracting data from HTML or XML documents
from IPython.display import Markdown, display  # For displaying formatted Markdown in Jupyter notebooks
from openai import OpenAI # For interacting with the OpenAI API

from selenium import webdriver
from selenium.webdriver.chrome.options import Options # For configuring Selenium Chrome options

## Load Environment Key

In [2]:
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()

# Go up one level to the project folder
project_dir = os.path.dirname(script_dir)
env_path = os.path.join(project_dir, "env_keys", ".env")

# Access the variable
load_dotenv(dotenv_path=env_path)
api_key = os.getenv("OPENAI_API_KEY")
print("API Key loaded:", api_key is not None)

API Key loaded: False


## Ollma Initialize

In [6]:
subprocess.Popen("ollama serve", shell=True)

<Popen: returncode: None args: 'ollama serve'>

time=2026-02-09T14:38:40.179+05:30 level=INFO source=routes.go:1636 msg="server config" env="map[HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_KEEP_ALIVE:5m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/Users/kumudithasilva/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false http_proxy: https_proxy: no_proxy:]"
time=2026-02-09T14:38:40.180+05:30 level=INFO so

In [7]:
requests.get("http://localhost:11434").content

[GIN] 2026/02/09 - 14:38:44 | 200 |      1.0375ms |       127.0.0.1 | GET      "/"


b'Ollama is running'

In [8]:
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
print(result.stdout)

[GIN] 2026/02/09 - 14:38:49 | 200 |      21.375µs |       127.0.0.1 | HEAD     "/"
[GIN] 2026/02/09 - 14:38:49 | 200 |    3.823208ms |       127.0.0.1 | GET      "/api/tags"
NAME    ID    SIZE    MODIFIED 



In [11]:
# !ollama pull llama3.2

In [12]:
OLLAMA_API_URL = "http://localhost:11434/v1"

## Initialize OpenAI Client for Ollma

In [None]:
# Initialize the OpenAI client with the base URL and API key, commenting out the API key since we are using a 
# local instance of Ollama that does not require authentication
openai = OpenAI(base_url=OLLAMA_API_URL, api_key="api_key")

## Selenium Support Static And Dynamic Webpage Filter

In [16]:
class Website:
    """
    A class to fetch, parse, and extract readable content from web pages.

    It supports both static (HTML-only) and dynamic (JavaScript-rendered) websites.
    """

    def __init__(self, url: str, use_selenium: bool = False):
        """
        Initializes the Website object and extracts the webpage content.

        Args:
            url (str): The target webpage URL.
            use_selenium (bool): Whether to use Selenium for JS-rendered pages.
                                 Default is False (use simple requests).
        """
        self.url = url
        self.title = None
        self.content = None

        if use_selenium:
            # --- For dynamic or JavaScript-rendered pages ---

            # Configure Chrome to run in headless mode (no visible browser window)
            options = Options()
            options.add_argument("--headless")         # Run Chrome without a GUI
            options.add_argument("--disable-gpu")      # Disable GPU acceleration
            options.add_argument("--no-sandbox")       # Required for some environments

            # Initialize the Chrome WebDriver with the specified options
            driver = webdriver.Chrome(options=options)
            # Load the webpage
            driver.get(url)
            # Get the fully rendered page source
            rendered_html = driver.page_source
            # Close the WebDriver 
            driver.quit()
            # Parse the HTML using BeautifulSoup
            soup = BeautifulSoup(rendered_html, 'html.parser')
        
        else:
            # --- For static HTML-only pages ---   

            response = requests.get(url)
            soup = BeautifulSoup(response.text, 'html.parser')
        
        self.title = soup.title.string if soup.title else 'No Title'
        for irrelevant in soup(['script', 'style', 'header', 'footer', 'nav', 'aside']):
            irrelevant.decompose()
        
        self.content = soup.body.get_text(separator="/n", strip=True)

## Extract And Display Webpage Content

In [17]:
nationalgeographic = Website("https://www.nationalgeographic.com/science/article/green-comet-lemmon-2025", use_selenium=True)
nationalgeographic_title = nationalgeographic.title

snippet  = nationalgeographic.content[:]
wrapped_snippet = textwrap.fill(snippet, width=80)

print(f" =={nationalgeographic_title}== \n")
print(wrapped_snippet[:520])

 ==Here‘s how to see a rare green comet this week | National Geographic== 

SCIENCE/nNIGHT SKY GUIDE/nThis rare green comet won’t return for a thousand
years. Here’s how to see it this week./nComet C/2025 A6 (Lemmon) is this year’s
brightest comet visible from Earth—an icy relic from the solar system’s origins
that won’t return until the next millennium./nComet C/2025 A6 (Lemmon), captured
from the Dark Sky Alqueva Observatory in Portugal on October 2, 2025, glows
green as it nears its closest approach to Earth. Here’s how to see this rare
green comet before it disappears for a millennium.


## OpenAI Message Helpers

In [18]:
def system_message() -> str:
    """System-level instruction for OpenAI."""
    return "You are a helpful assistant that summarizes webpage content in Markdown."


def build_user_message(website: Website) -> str:
    """Create the user message including page title and snippet."""
    snippet = textwrap.fill(website.content, width=80)
    return (
        f"You are looking at content from the webpage titled: **{website.title}**\n\n"
        f"Below is the extracted content. Please provide a concise Markdown summary "
        f"that captures the main ideas clearly and accurately:\n\n"
        f"{snippet}"
    )

def build_messages(website: Website):
    """Combine system and user messages into one list."""
    return [
        {"role": "system", "content": system_message()},
        {"role": "user", "content": build_user_message(website)},
    ]

## Summarization

In [20]:
def summarize_website(url: str) -> str:
    """Fetch and summarize the given website using OpenAI."""
    website = Website(url)
    messages = build_messages(website)

    response = openai.chat.completions.create(
        # model="gpt-4o-mini",
        model="llama3.2",
        messages=messages,
        temperature=0.7
    )

    summary = response.choices[0].message.content
    return summary


def display_summary(url: str):
    """Render the summary nicely in Markdown."""
    summary = summarize_website(url)
    display(Markdown(summary))


In [22]:
display_summary(nationalgeographic.url)

[GIN] 2026/02/09 - 14:49:01 | 200 | 12.185504458s |       127.0.0.1 | POST     "/v1/chat/completions"


**Rare Green Comet to Pass Closest to Earth this Week**
===========================================================

A rare and ancient comet, known as Comet C/2025 A6 (Lemmon), is passing closest to Earth on October 21, offering a unique opportunity for astronomers and stargazers to witness its spectacle. Here's what you need to know:

**What is Comet Lemmon?**
-------------------------

Comet C/2025 A6 (Lemmon) is an icy relic from the solar system's origins that won't return until the next millennium. It was discovered on January 3, 2025, and has been named after the Catalina Sky Survey observatory.

**How to See Comet Lemmon**
---------------------------

Comet Lemmon will be visible in the night sky as a soft green glow near the constellations Scorpius or Libra, low on the western horizon shortly after sunset. The comet should remain visible from mid-October through early November, fading gradually as it moves away from Earth.

**Why does Comet Lemmon Glow Green?**
-------------------------------------

The emerald hue of Comet Lemmon is due to the presence of diatomic carbon (C2), a molecule that gets broken down by solar radiation and emits green light.

**Additional Tips for Viewing Comet Lemmon**
--------------------------------------------

* Use binoculars or a good phone or digital camera to see more of the comet's coma.
* Check exact rise and set times using stargazing apps or NASA's Sky Events calendar.
* The peak of the Orionid meteor shower will coincide with the comet's passage, offering a rare double feature in the night sky.

**Rare Opportunity to See Comet Lemmon**
------------------------------------------

Comet C/2025 A6 (Lemmon) is one of the solar system's oldest relics and won't return for over 1,000 years. Don't miss this chance to witness its spectacular appearance in the night sky!