## Import important libraries

In [1]:
import os 
import requests
from openai import OpenAI
from IPython.display import update_display, Markdown, display
import json
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv



In [2]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

API key looks good so far


In [3]:
class Website:
    text:str
    links: List
    body: str
    title: str
    url: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else 'No title Found'
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
        else:
            self.text = ' '
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
    
    def get_content(self):
        return f" Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_prompt += "You should respond in JSON as in this example:"
system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""


def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
    Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [5]:
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [6]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {'role':'system', 'content':system_prompt},
            {'role':'user', 'content': get_links_user_prompt(website)}
        ],
        response_format= {'type': 'json_object'}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [7]:
url = 'https://www.wikipedia.org/'
web = Website(url)

In [8]:
get_links(url)

{'links': [{'type': 'about page',
   'url': 'https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use'},
  {'type': 'donate page',
   'url': 'https://donate.wikimedia.org/?wmf_medium=portal&wmf_campaign=portalFooter&wmf_source=portalFooter'}]}

In [9]:
def get_all_details(url):
    result = f"Landing Page:\n"
    result += Website(url).get_content()
    links = get_links(url)
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += f"{Website(link['url']).get_content()}\n"
    return result

In [10]:
brochure_system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [11]:
print(get_all_details(url))

Landing Page:
 Webpage Title:
Wikipedia
Webpage Contents:
Wikipedia
The Free Encyclopedia
English
6,974,000+
articles
日本語
1,457,000+
記事
Русский
2 036 000+
статей
Deutsch
3.001.000+
Artikel
Español
2.021.000+
artículos
Français
2 674 000+
articles
中文
1,470,000+
条目 / 條目
Italiano
1.910.000+
voci
Português
1.146.000+
artigos
Polski
1 652 000+
haseł
Search Wikipedia
Afrikaans
العربية
Asturianu
Azərbaycanca
Български
閩南語 / Bân-lâm-gú
বাংলা
Беларуская
Català
Čeština
Cymraeg
Dansk
Deutsch
Eesti
Ελληνικά
English
Español
Esperanto
Euskara
فارسی
Français
Galego
한국어
Հայերեն
हिन्दी
Hrvatski
Bahasa Indonesia
Italiano
עברית
ქართული
Ladin
Latina
Latviešu
Lietuvių
Magyar
Македонски
مصرى
Bahasa Melayu
Bahaso Minangkabau
မြန်မာဘာသာ
Nederlands
日本語
Norsk (bokmål)
Norsk (nynorsk)
Нохчийн
Oʻzbekcha / Ўзбекча
Polski
Português
Қазақша / Qazaqşa / قازاقشا
Română
Shqip
Simple English
Sinugboanong Binisaya
Slovenčina
Slovenščina
Српски / Srpski
Srpskohrvatski / Српскохрватски
Suomi
Svenska
தமிழ்
Татарча / Tatarça

In [12]:
def create_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role': 'system', 'content': brochure_system_prompt},
            {'role': 'user', 'content': get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    response = ''
    display_handle = display(Markdown(' '), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ' '
        response = response.replace("```",' ').replace("markdown", ' ')
        update_display(Markdown(response), display_id=display_handle.display_id)


# streaming without markdown 
# for chunk in stream:
#   print(chunk.choices[0].delta.content or ' ', end=' ')

# Brochure without streaming
# def create_brochure(company_name, url):
#     response = openai.chat.completions.create(
#         model=MODEL,
#         messages=[
#             {'role':'system', 'content':brochure_system_prompt},
#             {'role':'user', 'content':get_brochure_user_prompt(company_name, url)}
#         ]
#     )
#     result = response.choices[0].message.content
#     return display(Markdown(result))

In [13]:
print(create_brochure('Wikipedia', url))

 # Wikipedia Company Brochure

## About Wikipedia

**Wikipedia** is the world's largest free encyclopedia, offering over 6.9 million articles in English alone, and supporting numerous other languages. With articles extending across a multitude of fields, Wikipedia is not just a treasure trove of knowledge, but a testament to collaborative information sharing. It operates under the non-profit **Wikimedia Foundation**, which strives to ensure that every person can freely access the sum of all human knowledge.

## Key Features

- **Multilingual Support**: Wikipedia is available in over 300 languages, with more than 6,974,000 articles in English, 2,021,000 in Spanish, and a sizable number in many other languages.
- **User-Generated Content**: Anyone can create or edit entries, making it a truly user-driven platform.
- **Free Access**: All users can access Wikipedia's content without charge, ensuring that information is available to everyone, regardless of their economic status.

## Company Culture

At Wikipedia, we champion a vibrant and inclusive company culture. Our organizational values center around:

- **Collaboration**: Emphasizing teamwork and shared goals, we encourage contributors worldwide to join our mission of spreading knowledge.
- **Transparency**: Wikipedia fosters an open environment where decisions are made collectively and transparently, reflecting our commitment to the community.
- **Diversity and Inclusion**: We strive for a diverse representative of cultures, languages, and ideas to enrich content and experiences.

## Our Community and Customers

Wikipedia serves millions of users globally—from students and educators to researchers and casual readers. Our contributors range from subject-matter experts to everyday individuals, all adding their valuable insights and knowledge. Users rely on Wikipedia as an essential resource for information, making it a vital part of their learning processes and research endeavors.

## Careers at Wikipedia

We are always looking for passionate individuals to join our mission. Careers at Wikipedia are not just about jobs; they are about cultivation and learning. We offer:

- **Flexible Work Environment**: We embrace remote and hybrid work models that promote work-life balance.
- **Commitment to Learning**: Continuous education and opportunity to grow within the organization are encouraged.
- **Impactful Work**: Employees contribute to a meaningful cause that has a real-world impact on knowledge dissemination and access.

### Join Us

If you're interested in helping create a world where anyone can freely share and access knowledge, check our careers page for open positions and become part of our mission!

---

For more information, visit [Wikipedia](https://www.wikipedia.org) and join us as we continue to build the sum of all human knowledge. 

None


In [18]:
def get_brochure_text(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role':'system', 'content':brochure_system_prompt},
            {'role':'user', 'content':get_brochure_user_prompt(company_name, url)}
        ]
    )
    return response.choices[0].message.content
    

In [19]:
convert_lang_system_prompt = "You are a professional translator. You take brochures written in English and \
convert them into Spanish. Return only the Spanish translation in **Markdown** format."


def get_converted_lang_user_prompt(company_name, brochure):
    user_prompt = f"""You are looking at the brochure of a company called: {company_name}.\n
    Here is the brochure:\n
    {brochure}.\n
    Convert the entire brochure into only Spanish. Do not use any English and return only the Spanish version in **Markdown**.
    """
    return user_prompt



In [20]:
def get_spanish_text(company_name, url):
    english_brochure = get_brochure_text(company_name, url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {'role':'system', 'content':convert_lang_system_prompt},
            {'role':'user', 'content':get_converted_lang_user_prompt(company_name, english_brochure)}
        ]
    )
    result = response.choices[0].message.content
    return display(Markdown(result))


In [21]:
print(get_spanish_text('Wikipedia', url))

# Folleto de la Empresa Wikipedia

---

## **Acerca de Wikipedia**

Wikipedia es la enciclopedia en línea más grande y completa del mundo, que ofrece acceso a más de **6,974,000** artículos solo en inglés, y millones más en más de **300** idiomas. Sirve como un recurso valioso para estudiantes, investigadores y mentes curiosas de todo el mundo. Apoyada por la **Fundación Wikimedia**, Wikipedia es orgullosamente sin fines de lucro, confiando en donaciones para mantener sus operaciones y seguir ampliando sus ofertas.

---

## **Misión y Visión**

En Wikipedia, nuestra misión es proporcionar **acceso libre al conocimiento** para todos, en cualquier lugar. Creemos que **compartir conocimiento enriquece el mundo** y empodera a las personas para aprender, crecer y participar de manera significativa con la información. Nuestra visión es un futuro donde **todos** tengan acceso a la suma de todo el conocimiento humano.

---

## **Cultura de la Empresa**

### **Inclusividad**

Wikipedia promueve una cultura de inclusión y diversidad, acogiendo contribuciones de personas de todos los ámbitos de la vida. Este enfoque fomenta un entorno colaborativo donde se comparten ideas y perspectivas, mejorando la calidad del contenido.

### **Colaboración**

Nuestro modelo impulsado por la comunidad fomenta la colaboración entre voluntarios de todo el mundo. Apoyamos a nuestros contribuyentes a través de recursos y herramientas que facilitan la edición efectiva y el intercambio de información.

### **Transparencia**

Mantenemos la transparencia en nuestras operaciones, decisiones y en cómo gestionamos los fondos. Cada donación es contabilizada, y la comunidad es actualizada regularmente con informes de progreso sobre proyectos e iniciativas.

---

## **A Quién Servimos**

Wikipedia sirve a una vasta audiencia global, desde estudiantes y educadores hasta profesionales y entusiastas que buscan profundizar su comprensión de varios temas. Nuestra base de usuarios abarca múltiples demografías y culturas, impulsada por el deseo de información accesible.

---

## **Carreras en Wikipedia**

### **¿Por qué Unirse a Nosotros?**

- **Trabajo Impactante**: Sé parte de una misión que empodera a las personas a través del conocimiento.
- **Entorno Diverso**: Trabaja junto a personas apasionadas de diferentes orígenes y culturas.
- **Oportunidades Flexibles**: Con varios roles disponibles, desde técnicos hasta editoriales, hay un lugar para todos.

### **Oportunidades Actuales**

Siempre estamos en busca de individuos talentosos que estén apasionados por el conocimiento y el trabajo en equipo. Explora las oportunidades laborales en nuestro [sitio web oficial](https://www.wikimedia.org) para ver cómo puedes contribuir a nuestra misión.

---

## **Involúcrate**

Apóyanos convirtiéndote en donante, voluntario o defensor del conocimiento libre. Únete a la comunidad de Wikipedia y ayuda a hacer la diferencia.

### **Descargar Wikipedia**

Accede a Wikipedia mientras te desplazas descargando nuestra aplicación para **Android** o **iOS**. Disfruta de la lectura sin conexión y una interfaz personalizable para una experiencia personalizada.

--- 

### **Contáctanos**

Para más información sobre Wikipedia, cómo involucrarte, o consultas de medios, visita el [sitio oficial de Wikimedia](https://www.wikimedia.org). 

---

¡Juntos, podemos asegurar que el conocimiento siga siendo libre y accesible para todos!

None
