##This is the import cell, also called a setup cell or bootstrap cell in notebooks.##
Loads external libraries

Loads helper modules you wrote (scraper)

Prepares your environment for the rest of the notebook

In [4]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [5]:
load_dotenv(override=True)
api_key = os.getenv('GEMINI_API_KEY') or os.getenv('GOOGLE_API_KEY')

if not api_key:
    print("No Gemini API key found. Please create one at https://aistudio.google.com/app/apikey.")
elif not api_key.startswith("AI"):
    print("A key was found, but it doesn’t look like a valid Gemini key (should start with 'AI').")
elif api_key.strip() != api_key:
    print("Your Gemini key has extra spaces — remove them in your .env file.")
else:
    print("Gemini API key found and looks good ✅")


gemini_key=os.getenv("GEMINI_API_KEY") or os.getenv("GOOGLE_API_KEY")

# Create client pointed at Gemini's OpenAI-compatible endpoint
gemini=OpenAI(
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
    api_key=gemini_key
)

MODEL="gemini-2.0-flash"


Gemini API key found and looks good ✅


In [12]:
links = fetch_website_links("https://edwarddonner.com")


In [6]:
link_system_prompt = """
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:

{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format. but dont just return 2 entries in json.
return as many entries as possible
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt 

#print(get_links_user_prompt("https://edwarddonner.com"))

In [9]:
def select_relevant_links(url):
    response = gemini.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links
    

In [13]:
select_relevant_links("https://www.apple.com/")

{'links': [{'type': 'mac', 'url': 'https://www.apple.com/mac/'},
  {'type': 'ipad', 'url': 'https://www.apple.com/ipad/'},
  {'type': 'iphone', 'url': 'https://www.apple.com/iphone/'},
  {'type': 'watch', 'url': 'https://www.apple.com/watch/'},
  {'type': 'apple vision pro',
   'url': 'https://www.apple.com/apple-vision-pro/'},
  {'type': 'airpods', 'url': 'https://www.apple.com/airpods/'},
  {'type': 'tv-home', 'url': 'https://www.apple.com/tv-home/'},
  {'type': 'entertainment', 'url': 'https://www.apple.com/entertainment/'},
  {'type': 'airtag', 'url': 'https://www.apple.com/airtag/'},
  {'type': 'wallet', 'url': 'https://www.apple.com/wallet/'},
  {'type': 'apple card', 'url': 'https://www.apple.com/apple-card/'},
  {'type': 'apple pay', 'url': 'https://www.apple.com/apple-pay/'},
  {'type': 'apple cash', 'url': 'https://www.apple.com/apple-cash/'},
  {'type': 'iCloud', 'url': 'https://www.icloud.com'},
  {'type': 'apple one', 'url': 'https://www.apple.com/apple-one/'},
  {'type': 

Here is another more descriptive way of doing the previous step

In [None]:
def select_relevant_links(url):
    print(f"Selecting relevant links for {url} by calling {MODEL}")
    response = gemini.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    print(f"Found {len(links['links'])} relevant links")
    return links

# select_relevant_links("https://www.apple.com/")

Selecting relevant links for https://www.apple.com/ by calling gemini-2.0-flash
Found 42 relevant links


{'links': [{'type': 'mac', 'url': 'https://www.apple.com/mac/'},
  {'type': 'ipad', 'url': 'https://www.apple.com/ipad/'},
  {'type': 'iphone', 'url': 'https://www.apple.com/iphone/'},
  {'type': 'watch', 'url': 'https://www.apple.com/watch/'},
  {'type': 'apple vision pro',
   'url': 'https://www.apple.com/apple-vision-pro/'},
  {'type': 'airpods', 'url': 'https://www.apple.com/airpods/'},
  {'type': 'tv & home', 'url': 'https://www.apple.com/tv-home/'},
  {'type': 'entertainment', 'url': 'https://www.apple.com/entertainment/'},
  {'type': 'airtag', 'url': 'https://www.apple.com/airtag/'},
  {'type': 'apple card', 'url': 'https://www.apple.com/apple-card/'},
  {'type': 'wallet', 'url': 'https://www.apple.com/wallet/'},
  {'type': 'apple pay', 'url': 'https://www.apple.com/apple-pay/'},
  {'type': 'apple cash', 'url': 'https://www.apple.com/apple-cash/'},
  {'type': 'icloud', 'url': 'https://www.icloud.com'},
  {'type': 'apple one', 'url': 'https://www.apple.com/apple-one/'},
  {'type'

In [9]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [17]:
print(fetch_page_and_all_relevant_links("https://apple.com"))

Selecting relevant links for https://apple.com by calling gemini-2.0-flash
Found 41 relevant links
## Landing Page:

Apple

Apple
Apple
Store
Mac
iPad
iPhone
Watch
Vision
AirPods
TV & Home
Entertainment
Accessories
Support
0
+
Give something special.
Find what they’ve been waiting for all year.
Shop gifts
iPhone 17 Pro
All out Pro.
Learn more
Buy
iPhone Air
The thinnest iPhone ever. With the power of pro inside.
Learn more
Buy
AirPods Pro 3
The world’s best in-ear Active Noise Cancellation.
Learn more
Buy
iPad Air
Now supercharged by the M3 chip.
Learn more
Buy
Apple Watch Series 11
The ultimate way to watch your health.
Learn more
Buy
iPad
Now with the speed of the A16 chip and double the starting storage.
Learn more
Buy
Apple Trade In
Get up to $180–$670 in credit when you trade in iPhone 13 or higher.
1
Get your estimate
Apple Card
Get up to 3% Daily Cash back with every purchase.
Learn more
Apply now
Apply now
Apple TV
FAM Gallery
Watch now
Strong and Calm Combos for Busy Days
List

In [10]:
brochure_system_prompt = """
You are an assistant that analyzes the contents of several relevant pages from a company website
and creates a short brochure about the company for prospective customers, investors and recruits.
Respond in markdown without code blocks.
Include details of company culture, customers and careers/jobs if you have the information.
"""

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# brochure_system_prompt = """
# You are an assistant that analyzes the contents of several relevant pages from a company website
# and creates a short, humorous, entertaining, witty brochure about the company for prospective customers, investors and recruits.
# Respond in markdown without code blocks.
# Include details of company culture, customers and careers/jobs if you have the information.
# """


In [11]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"""
You are looking at a company called: {company_name}
Here are the contents of its landing page and other relevant pages;
use this information to build a short brochure of the company in markdown without code blocks.\n\n
"""
    user_prompt += fetch_page_and_all_relevant_links(url)
    # user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [12]:
def create_brochure(company_name, url):
    response = gemini.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": brochure_system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [13]:
create_brochure("SAIL","https://www.sail.co.in/en")

Selecting relevant links for https://www.sail.co.in/en by calling gemini-2.0-flash
Found 46 relevant links


# SAIL: Forging a Stronger India

## About Us

Steel Authority of India Limited (SAIL) is a Maharatna Central Public Sector Enterprise (CPSE) under the Ministry of Steel, Government of India. We are India's largest steel producer, committed to providing cost-effective and superior quality products and services.

## Our Vision

To be a respected world-class corporation and the leader in Indian steel, excelling in quality, cost, productivity, and customer satisfaction.

## Products

SAIL offers a wide spectrum of steel products, including:

*   Structurals
*   SAIL TMT Bars
*   Galvanised Products
*   Wire Rods
*   Plates
*   Railway Products (Wheels and Axles)
*   Hot & Cold Rolled Products
*   Pipes
*   Electrical Steels
*   Stainless Steel Products
*   Semis
*   Pig Iron

## Our Plants

Our integrated steel plants are located at:

*   Bhilai
*   Bokaro
*   Durgapur
*   Rourkela
*   IISCO (Burnpur)

We also have special steel plants like Chandrapur Ferro-Alloy Plant.

## Marketing and Reach

SAIL makes its steel available at customers’ doorsteps through an extensive warehouse, distributor, and dealer network. You can even buy SAIL SeQR TMT Bars online.

## Investor Relations

We maintain transparency through:

*   Performance Highlights
*   Annual General Meetings
*   Financial Disclosures
*   Chairman's Address
*   Information on Shares and Dividends

## Careers at SAIL

SAIL offers diverse career opportunities across its various plants and units. We seek talented individuals in:

*   Engineering & Technology
*   Research & Development
*   Consultancy
*   Environment Management
*   Logistics
*   Management Training
*   Central Marketing
*   Raw Materials Division
*   And more!

Visit our Careers page for current job openings. SAIL provides Mediclaim and pension benefits for its employees.

## Corporate Social Responsibility

SAIL is committed to Corporate Citizenship, sustainability and environmental responsibility.

