# LLM Powered Brochure Generator
This script scrapes the contents of a webpage, extracts relevant links, and uses OpenAI's GPT model to generate a short brochure about the company based on the webpage contents. 


In [112]:
# Importing the necessary libraries
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [65]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
openai=OpenAI()

In [104]:
class Website:
    """
    Website class is responsible for extracting the title and textual contents from a given URL.
    It also retrieves all the links on the page and filters out any irrelevant links like images or scripts.
    """
    def __init__(self, url):
        self.url = url
        webresponse = requests.get(url)
        self.body = webresponse.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "Title Not Found"
        self.text=" "
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
                self.text = soup.body.get_text(separator="\n", strip=True)
        links = [link.get('href') for link in soup.find_all('a')] #Finding all anchor tags
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
        

In [67]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [107]:
def get_links_user_prompt(website):
    """
    Constructs a user prompt to guide the model in identifying relevant links for the brochure.
    :param website: The Website object containing all the links on the page.
    :return: A string that prompts the model to choose relevant links.
    """
    user_prompt = f"Here is the list of links on the website - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [109]:
def good_links(url):
    """
    Analyzes the webpage and extracts relevant links using the OpenAI API.
    return: A dictionary containing the filtered relevant links.
    """
    website = Website(url)
    messages = [
    {'role':'system', 'content':link_system_prompt},
    {'role':'user', 'content': get_links_user_prompt(website)}]
    response = openai.chat.completions.create(model = 'gpt-4o-mini', messages = messages, 
                                             response_format={"type": "json_object"})
    result = response.choices[0].message.content
    return json.loads(result)
    

In [77]:
good_links('https://www.anthropic.com/')

{'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'},
  {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'},
  {'type': 'team page', 'url': 'https://www.anthropic.com/team'},
  {'type': 'enterprise page', 'url': 'https://www.anthropic.com/enterprise'},
  {'type': 'research page', 'url': 'https://www.anthropic.com/research'},
  {'type': 'pricing page', 'url': 'https://www.anthropic.com/pricing'},
  {'type': 'news page', 'url': 'https://www.anthropic.com/news'}]}

In [110]:
def get_full_details(url):
    """
    Retrieves the full details of the company by combining the main page's contents and relevant links.
    """
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = good_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result
    

In [79]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_full_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [111]:
def create_brochure(company_name, url):
    """
    Creates a brochure for the company using the OpenAI API and displays it in markdown format.
    :param company_name: The name of the company.
    :param url: The URL of the company's landing page.
    """
    response = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ]
    )
    # Access the first choice's content directly
    result = response.choices[0].message.content
    display(Markdown(result))


In [103]:
create_brochure('ESPN', 'https://www.espncricinfo.com/')

Found links: {'links': [{'type': 'about page', 'url': 'https://www.espncricinfo.com/records'}, {'type': 'careers page', 'url': 'https://www.espncricinfo.com/auction/ipl-2025-auction-1460972'}, {'type': 'company page', 'url': 'https://www.espncricinfo.com/cricket-videos/genre/cricinformed-299'}, {'type': 'company page', 'url': 'http://www.espn.in/'}, {'type': 'news page', 'url': 'https://www.espncricinfo.com/cricket-news'}, {'type': 'features page', 'url': 'https://www.espncricinfo.com/cricket-features'}, {'type': 'social media page', 'url': 'https://www.instagram.com/espncricinfo/'}, {'type': 'social media page', 'url': 'https://twitter.com/espncricinfo'}, {'type': 'social media page', 'url': 'https://www.facebook.com/Cricinfo/'}, {'type': 'social media page', 'url': 'https://www.youtube.com/espncricinfo'}]}


# ESPN Company Brochure

## About ESPN
ESPN, a global leader in sports broadcasting and digital media, delivers unparalleled coverage of sporting events across various disciplines. From cricket to football, basketball to tennis, ESPN is dedicated to providing comprehensive insights, statistics, and real-time updates to millions of sports fans worldwide.

---

## Company Culture
At ESPN, our culture is defined by a passion for sports and a commitment to excellence. We foster an inclusive environment that values diversity, creativity, and collaboration. Our teams work together across multiple platforms to innovate the way fans engage with sports, ensuring that we are at the forefront of sports media. We believe in empowering our employees to think outside the box, embrace challenges, and contribute to content that resonates with our audience.

---

## Our Customers
Our diverse audience includes sports enthusiasts, dedicated fans, players, teams, and leagues globally. We cater to millions who seek live scores, match updates, in-depth analysis, and exclusive features that enhance their sporting experience. Whether it's providing comprehensive coverage of leagues like the BPL, ILT20, and Women's T20 leagues or in-depth features on tournaments such as the U-19 T20 World Cup, ESPN thrives on meeting the varied needs of its customers.

---

## Careers at ESPN
ESPN actively seeks talent that shares our passion for sports and innovation. We offer a wide range of career opportunities across different sectors including journalism, technology, marketing, and production. Our commitment to employee growth and development is evident in our collaborative work environment and the various training programs we provide. 

If you're looking to join a dynamic team that inspires and entertains, explore the exciting career opportunities available at ESPN!

---

## Join Us
Join ESPN in shaping the future of sports media. Whether as a viewer, an employee, or an investor, be a part of the journey as we continue to redefine what it means to be a sports fan in the digital age.

---

**Contact Us:**  
For more information on our services, career opportunities, or partnership inquiries, please visit our website at [ESPN.com](https://www.espn.com).

### Follow Us on Social Media
Stay connected with ESPN for the latest updates and insights!  
- Twitter: [@ESPN](https://twitter.com/espn)
- Facebook: [ESPN](https://www.facebook.com/espn)
- Instagram: [@espn](https://www.instagram.com/espn)

---

*Your passion for sports starts here. Dive into the world of ESPN today!*