In [2]:
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [5]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key found")
else:
    print("No API found")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key found


In [17]:
class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

In [35]:
url = "https://www.skysports.com/la-liga-table"
web = Website(url)
print(web.title)

Spanish La Liga Table


In [36]:
system_prompt = "You are an assistant that analyzes the table of football league of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [47]:
# User Prompt that asks for summaries of websites

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of the table in markdown. \
Also provide whuch team are top four and which teams are in danger zone for relegation\
Which team scored the most, and which team score the least\
Which team conceded most goal, and conceded least goal\
After that analyze key insight of these data from given page\\n"
    user_prompt += website.text
    return user_prompt

In [48]:
def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [49]:
messages_for(web)

[{'role': 'system',
  'content': 'You are an assistant that analyzes the table of football league of a website and provides a short summary, ignoring text that might be navigation related. Respond in markdown.'},
 {'role': 'user',
  'content': "You are looking at a website titled Spanish La Liga Table\nThe contents of this website is as follows; please provide a short summary of the table in markdown. Also provide whuch team are top four and which teams are in danger zone for relegationWhich team scored the most, and which team score the leastWhich team conceded most goal, and conceded least goalAfter that analyze key insight of these data from given page\\nSkip to content\nHome\nSports\nFootball\nF1\nCricket\nRugby Union\nRugby League\nGolf\nBoxing\nNFL\nTennis\nRacing\nDarts\nNetball\nMMA\nMore Sports\nScores\nWatch\nSky Bet\nShop\nMore\nPodcasts\nLive on Sky\nGet Sky Sports\nSky Sports App\nNOW\nKick It Out\nBlack Lives Matter\nBritish South Asians in Football\nWatch Sky Sports\nFoo

In [50]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

# A function to display this nicely in the Jupyter output, using markdown
# Call summarize(url)
def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [51]:
display_summary(url)

# Spanish La Liga Table Summary

**Top Four Teams:**
1. Real Madrid
2. Atletico Madrid
3. Barcelona
4. Athletic Bilbao

**Relegation Zone Teams:**
18. Espanyol
19. Valencia
20. Real Valladolid

**Goals Analysis:**
- **Most Goals Scored:** Barcelona (59 Goals)
- **Least Goals Scored:** Real Valladolid (14 Goals)

**Goals Conceded Analysis:**
- **Most Goals Conceded:** Real Valladolid (42 Goals)
- **Least Goals Conceded:** Atletico Madrid (14 Goals)

## Key Insights:
- Real Madrid leads the table with a strong goal difference of +30, showcasing their offensive strength alongside a solid defense.
- Barcelona is the top-scoring team, indicating a potent attack, although they have conceded a relatively high number of goals (24).
- The relegation zone includes traditional teams like Valencia, who are struggling significantly with only 16 points and a goal difference of -16.
- Real Valladolid appears to be in dire straits, both in scoring and defense, making it difficult for them to escape relegation without drastic improvements.