# Use OpenAI to generate a summary of the content from a given website URL # 2025


In [2]:
# imports
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI


In [5]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key

if not api_key:
    print("No API key was found, please set in env")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key ")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end")
else:
    print("API key found and looks good so far!")

API key found and looks good so far!


In [7]:
openai=OpenAI()

In [8]:
message = "let's start"
response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role":"user", "content":message}])
print(response.choices[0].message.content)

Sure! What would you like to talk about or explore today?


In [9]:
# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [16]:
# lets try to parse india times article based on RCB win
ed = Website("https://www.indiatimes.com/trending/rcb-vs-pbks-ipl-2025-jio-hotstar-crosses-578-crore-views-during-final-showdown-becomes-most-viewed-match-of-the-season-660265.html")
print(ed.title)
print(ed.text)

RCB VS PBKS, IPL 2025: Jio Hotstar crosses 57.8 crore views during final showdown, becomes most-viewed match of the season
Home
Lifestyle
Astrology
Whats Cooking
Relationships
Style
Art and Culture
Gardening
Beauty and Care
Health & Wellness
Healthy Living
Recipes
Inspire
Tips & Tricks
Buzz
Fashion
Beauty & Care
Celebrity Fashion
Style
Worth
Investment
Mutual Funds
News
Entertainment
Bollywood
Celebscoop
Hollywood
Originals
Binge
Explore
Technology
News
Apps
Science And Future
Auto
Current
Electric
Alternate
Specials
Year Ender 2024
Treks Of India
101 Abu Dhabi Dos
Create on India
Oscars 2024
Wanderlust
Ayodhya Ram Mandir
#FitIT
India On Plate
Sustainability
Give Up Plastic
The Great Indian Brain Drain
Careers
Equals
Trending
RCB VS PBKS, IPL 2025: Jio Hotstar crosses 57.8 crore views during final showdown, becomes most-viewed match of the season
RCB VS PBKS, IPL 2025: Jio Hotstar crosses 57.8 crore views during final showdown, becomes most-viewed match of the season
Jio Hotstar has wi

In [33]:
# print(ed.text)

# System prompt: Instructs LLM's behavior and tone
{"role": "system", "content": "You are a polite travel guide."}

# User prompt: Asks the actual task
{"role": "user", "content": "Suggest 3 places to visit in Japan."}


In [14]:
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [23]:
# A function that writes a User Prompt that asks for summaries of websites:

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary in of this website in markdown. \
If it includes news or announcements, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

In [30]:
# print(user_prompt_for(ed))


In [25]:
# See how this function creates exactly the format above
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [26]:
messages_for(ed)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the contents of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': "You are looking at a website titled RCB VS PBKS, IPL 2025: Jio Hotstar crosses 57.8 crore views during final showdown, becomes most-viewed match of the season\nThe contents of this website is as follows; please provide a short summary in 100  words of this website in markdown. If it includes news or announcements, then summarize these too.\n\nHome\nLifestyle\nAstrology\nWhats Cooking\nRelationships\nStyle\nArt and Culture\nGardening\nBeauty and Care\nHealth & Wellness\nHealthy Living\nRecipes\nInspire\nTips & Tricks\nBuzz\nFashion\nBeauty & Care\nCelebrity Fashion\nStyle\nWorth\nInvestment\nMutual Funds\nNews\nEntertainment\nBollywood\nCelebscoop\nHollywood\nOriginals\nBinge\nExplore\nTechnology\nNews\nApps\nScience And Future\nAuto\nCurrent\nElectric\nAlternat

In [27]:
# lets utilized all above functions finally and summarize the url content

def summarize(url):
    website = Website(url) #website class we already defined on top
    response = openai.chat.completions.create( #i am using chat comletion model of openai here
        model = "gpt-4o-mini",
        messages = messages_for(website) #message_for func above
    )
    return response.choices[0].message.content

In [29]:
summarize("https://www.indiatimes.com/trending/rcb-vs-pbks-ipl-2025-jio-hotstar-crosses-578-crore-views-during-final-showdown-becomes-most-viewed-match-of-the-season-660265.html")

"## Summary\n\nThe website reports on the monumental viewership achieved by Jio Hotstar during the IPL 2025 finals where Royal Challengers Bengaluru (RCB) defeated Punjab Kings (PBKS), marking RCB's first win in 18 years. Over 57.8 crore views were recorded for this match, making it the most-watched of the season. The IPL 2025 overall saw a 35% year-on-year viewership increase, with teams attracting millions of viewers. Jio Hotstar aims to generate substantial ad revenue, suggesting the tournament is on track to break previous advertising records amidst a surge in franchise valuations."

In [31]:
# to make look more better
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [32]:
display_summary("https://www.indiatimes.com/trending/rcb-vs-pbks-ipl-2025-jio-hotstar-crosses-578-crore-views-during-final-showdown-becomes-most-viewed-match-of-the-season-660265.html")

# Summary of RCB VS PBKS, IPL 2025

The final match of IPL 2025, where Royal Challengers Bengaluru (RCB) triumphed over Punjab Kings (PBKS), has become the most-viewed game of the season, achieving over 57.8 crore views on Jio Hotstar. This marks RCB's first championship win in 18 years, showcasing increased viewership driven by fierce rivalries and engaging team dynamics. Jio Hotstar also reported a 35% year-on-year increase in viewership, and the tournament’s overall popularity is reflected in the projected ad revenues of approximately ₹4,500 crore. IPL's franchise values have reached all-time highs, further emphasizing its significance in the sports landscape.