In [2]:
import os
import requests
from openai import OpenAI
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from typing import List
from dotenv import load_dotenv
import json

In [3]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

API key looks good so far


In [4]:
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [5]:
class Website:
    text: str
    links: List
    title: str
    url: str
    body: str


    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ' '
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
    
    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [7]:
url = 'https://edwarddonner.com'
web = Website(url)
print(web.get_contents())

ConnectionError: HTTPSConnectionPool(host='edwarddonner.com', port=443): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x107f50cd0>: Failed to resolve 'edwarddonner.com' ([Errno 8] nodename nor servname provided, or not known)"))

In [6]:
system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_prompt += "You should respond in JSON as in this example:"
system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""


In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
    Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role': 'system', 'content': system_prompt},
            {'role': 'user', 'content': get_links_user_prompt(website)}
        ],
        response_format= {'type': 'json_object'}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [9]:
print(get_links(url))

{'links': [{'type': 'about page', 'url': 'https://edwarddonner.com/about-me-and-about-nebula/'}, {'type': 'linkedin profile', 'url': 'https://www.linkedin.com/in/eddonner/'}, {'type': 'twitter profile', 'url': 'https://twitter.com/edwarddonner'}, {'type': 'facebook profile', 'url': 'https://www.facebook.com/edward.donner.52'}]}


In [10]:
def get_all_details(url):
    result = f'Landing Page:\n'
    result += Website(url).get_contents()
    links = get_links(url)
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += f"{Website(link['url']).get_contents()}\n"
    return result


In [11]:
print(get_all_details(url))

SSLError: HTTPSConnectionPool(host='twitter.com', port=443): Max retries exceeded with url: /edwarddonner (Caused by SSLError(SSLEOFError(8, 'EOF occurred in violation of protocol (_ssl.c:1129)')))

In [None]:
brochure_system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [None]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [None]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role':'system', 'content':brochure_system_prompt},
            {'role':'user', 'content':get_brochure_user_prompt(company_name, url)}
        ]
    )
    result = response.choices[0].message.content
    return display(Markdown(result))

In [None]:
print(create_brochure('Wikipedia', 'https://www.wikipedia.org/'))

# Wikipedia Brochure

## Welcome to Wikipedia
**The Free Encyclopedia**

Wikipedia is the world's largest and most diverse online encyclopedia, offering more than 6,974,000 articles in English alone, with millions more in over 300 languages. Our mission is to provide free and accessible knowledge to everyone, everywhere.

---

## Our Commitment

At Wikipedia, we are dedicated to:

- **Free Access to Information**: All our articles are accessible for free, ensuring that knowledge is available to all, regardless of geography or status.
  
- **Diversity**: Our expansive content covers a wide range of topics and languages, promoting inclusivity and understanding across cultures. 

- **Community-Driven**: Wikipedia is powered by a global community of volunteer editors who create and maintain content, making it a dynamic platform that grows and evolves continuously.

---

## Company Culture
At Wikipedia, our culture is rooted in the principles of openness, collaboration, and respect. We thrive on:

- **Volunteering and Contribution**: Our content is created by an ever-growing community of volunteers who contribute their expertise and time to build a comprehensive repository of knowledge.

- **Diversity and Inclusivity**: We recognize and celebrate the diversity of our contributors, aiming to represent a multitude of voices and perspectives through our articles.

- **Empowering Curiosity**: We foster an environment where curiosity and learning are encouraged and supported through collaborative efforts and shared knowledge.

---

## Our Customers
Wikipedia serves a wide array of users globally, including:

- **Students and Researchers**: A reliable source of information for anyone pursuing academic work or personal interests.
  
- **Professionals and Enthusiasts**: Individuals working in various fields benefit from our exhaustive content to enhance their knowledge and skills.

- **Educators**: Teachers and professors utilize Wikipedia as a teaching resource to engage students and encourage deeper research.

---

## Careers at Wikipedia
Joining the Wikipedia community means becoming part of a larger mission to democratize knowledge. W e are always seeking passionate individuals who:

- **Share Our Vision**: Are committed to creating a world where information is free and available to all.
  
- **Value Collaboration**: Enjoy working in teams and value the insights gained from diverse perspectives.

- **Have a Growth Mindset**: Are eager to learn and grow in a dynamic, forward-thinking environment.

If you are interested in contributing to our mission, explore our opportunities [here](https://wikimediafoundation.org).

---

**Join Us in Our Mission**
By supporting Wikipedia, you become an integral part of a larger movement to make knowledge accessible to everyone. Donate today and help us continue to grow and improve! 

*Download Wikipedia available on Android and iOS. Customize your reading experience and save articles for offline reading.*

[Wikipedia](https://www.wikipedia.org)  
*Your source for free knowledge.*

None


In [None]:
def streaming_create_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role':'system', 'content':brochure_system_prompt},
            {'role':'user', 'content':get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    response = ' '
    display_handle = display(Markdown(' '), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ' '
        response = response.replace("```",' ').replace("markdown", ' ')
        update_display(Markdown(response), display_id=display_handle.display_id)


# streaming without markdown 
# for chunk in stream:
#   print(chunk.choices[0].delta.content or ' ', end=' ')

In [None]:
print(streaming_create_brochure('Wikipedia', 'https://www.wikipedia.org/'))

  # Wikipedia Brochure

---

## About Us 
**Wikipedia** is the world's largest and most comprehensive online encyclopedia, providing free access to a vast collection of knowledge. With over **6.9 million** articles in English and millions in multiple languages, it's a platform powered by volunteers from around the globe, driven by the mission of sharing knowledge freely.

---

## Company Culture
At Wikipedia, inclusivity and collaboration are at the core of our culture. We operate in a dynamic, open, and community-driven environment where everyone can contribute to and improve the encyclopedia. Our ethos embraces diversity, with volunteers from innumerable backgrounds working together towards a common goal of accessible information for all. 

---

## Our Users
Wikipedia serves a broad audience of users ranging from students and researchers to casual learners and educators. With content available in over **300 languages**, we are committed to making information accessible to everyone, everywhere. 

---

## How Can You Contribute?
### Join Our Community!
Wikipedia is not just an encyclopedia; it's a community composed of contributors from all walks of life. Whether you're a seasoned editor or a beginner, you can start contributing today. Share your knowledge, improve existing articles, and help curate the world's knowledge base.

---

## Careers at Wikipedia
**Work with Us!**  
As part of the **Wikimedia Foundation**, we offer a range of career opportunities that allow you to engage with innovative projects and contribute to a global mission. We seek passionate individuals who are committed to free knowledge and are eager to make a difference through technology, outreach, and community engagement. 

Explore roles in:
- Technology and Software Development
- Community Engagement and Support
- Operations and Project Management

### Learn More
Visit our careers page to view current openings and apply to join our diverse team dedicated to knowledge equity.

---

## Get Involved
Support our mission through donations or by participating to edit and curate content. Every effort counts in keeping Wikipedia free for all.

---

## Connect With Us
Visit us at [Wikipedia](https://www.wikipedia.org/) and be a part of this incredible journey towards making knowledge accessible to everyone, everywhere. 

None
