## Import important libraries

In [24]:
import os 
import requests
from openai import OpenAI
from IPython.display import update_display, Markdown, display
import json
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv
import gradio as gr
import anthropic

In [None]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

API key looks good so far


In [4]:
class Website:
    text:str
    links: List
    body: str
    title: str
    url: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else 'No title Found'
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
        else:
            self.text = ' '
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
    
    def get_content(self):
        return f" Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [5]:
system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_prompt += "You should respond in JSON as in this example:"
system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""


def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
    Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [6]:
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [7]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {'role':'system', 'content':system_prompt},
            {'role':'user', 'content': get_links_user_prompt(website)}
        ],
        response_format= {'type': 'json_object'}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [8]:
url = 'https://www.wikipedia.org/'
web = Website(url)

In [9]:
get_links(url)

{'links': [{'type': 'about page',
   'url': 'https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Terms_of_Use'},
  {'type': 'about page',
   'url': 'https://foundation.wikimedia.org/wiki/Special:MyLanguage/Policy:Privacy_policy'},
  {'type': 'donation page',
   'url': 'https://donate.wikimedia.org/?wmf_medium=portal&wmf_campaign=portalFooter&wmf_source=portalFooter'},
  {'type': 'mobile app page',
   'url': 'https://en.wikipedia.org/wiki/List_of_Wikipedia_mobile_applications'},
  {'type': 'Wikimedia Foundation',
   'url': 'https://meta.wikimedia.org/wiki/Special:MyLanguage/List_of_Wikipedias'}]}

In [10]:
def get_all_details(url):
    result = f"Landing Page:\n"
    result += Website(url).get_content()
    links = get_links(url)
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += f"{Website(link['url']).get_content()}\n"
    return result

In [None]:
brochure_system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5000] # Truncate if more than 5,000 characters
    return user_prompt

In [12]:
print(get_all_details(url))

Landing Page:
 Webpage Title:
Wikipedia
Webpage Contents:
Wikipedia
The Free Encyclopedia
English
6,974,000+
articles
日本語
1,457,000+
記事
Русский
2 036 000+
статей
Deutsch
3.001.000+
Artikel
Español
2.021.000+
artículos
Français
2 674 000+
articles
中文
1,470,000+
条目 / 條目
Italiano
1.910.000+
voci
Português
1.146.000+
artigos
Polski
1 652 000+
haseł
Search Wikipedia
Afrikaans
العربية
Asturianu
Azərbaycanca
Български
閩南語 / Bân-lâm-gú
বাংলা
Беларуская
Català
Čeština
Cymraeg
Dansk
Deutsch
Eesti
Ελληνικά
English
Español
Esperanto
Euskara
فارسی
Français
Galego
한국어
Հայերեն
हिन्दी
Hrvatski
Bahasa Indonesia
Italiano
עברית
ქართული
Ladin
Latina
Latviešu
Lietuvių
Magyar
Македонски
مصرى
Bahasa Melayu
Bahaso Minangkabau
မြန်မာဘာသာ
Nederlands
日本語
Norsk (bokmål)
Norsk (nynorsk)
Нохчийн
Oʻzbekcha / Ўзбекча
Polski
Português
Қазақша / Qazaqşa / قازاقشا
Română
Shqip
Simple English
Sinugboanong Binisaya
Slovenčina
Slovenščina
Српски / Srpski
Srpskohrvatski / Српскохрватски
Suomi
Svenska
தமிழ்
Татарча / Tatarça

In [None]:
def create_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role': 'system', 'content': brochure_system_prompt},
            {'role': 'user', 'content': get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    response = ''
    display_handle = display(Markdown(' '), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ' '
        response = response.replace("```",' ').replace("markdown", ' ')
        update_display(Markdown(response), display_id=display_handle.display_id)
        


# streaming without markdown 
# for chunk in stream:
#   print(chunk.choices[0].delta.content or ' ', end=' ')

# Brochure without streaming
# def create_brochure(company_name, url):
#     response = openai.chat.completions.create(
#         model=MODEL,
#         messages=[
#             {'role':'system', 'content':brochure_system_prompt},
#             {'role':'user', 'content':get_brochure_user_prompt(company_name, url)}
#         ]
#     )
#     result = response.choices[0].message.content
#     return display(Markdown(result))

In [14]:
print(create_brochure('Wikipedia', url))

 # Wikipedia Brochure

## About Us
Wikipedia is the world's largest and most popular free encyclopedia, comprising over **6.9 million articles in English** alone! As a project of the **Wikimedia Foundation**, we are a non-profit organization dedicated to providing free knowledge to people everywhere. With articles available in over **300 languages**, we ensure that information is accessible and informative for everyone.

## Our Mission
Our mission is to empower the world to share and access knowledge freely. We strive to create a world where every person can not only read and learn but can also contribute to expanding this wealth of information.

## Company Culture
At Wikipedia, we embrace a vibrant and collaborative culture that values diversity, openness, and community. Our contributors, also known as "Wikipedians", come from countless backgrounds and share a passion for free knowledge. Our work environment is inclusive, and we encourage a global community of contributors to engage in content creation and improvement.

## Who We Serve
We cater to a diverse audience ranging from curious learners to researchers and educators worldwide. Our users rely on Wikipedia for accurate information across various subjects, contributing to a global culture of knowledge-sharing.

## Get Involved
Want to make a difference? Join us on this journey! Wikipedia thrives on the contributions of volunteers who edit and add information. Whether you are a seasoned expert or an eager learner, your insights can help improve the quality of content. 

### Careers at Wikipedia
The Wikimedia Foundation offers exciting career opportunities in various fields including software development, community engagement, content moderation, and more. We seek passionate and innovative individuals to join our cause and help us realize our mission of disseminating free knowledge.

## Support Us
You can help us continue our work by supporting Wikipedia through donations. Your contributions enable us to maintain our platform and keep it accessible for everyone around the globe.

## Connect with Us
To learn more about our initiatives, join the Wikipedia community, or explore job opportunities, visit our website at [Wikipedia.org](https://www.wikipedia.org) or follow us on our social media channels!

---

Together, let's keep the spirit of knowledge-sharing alive! 

None


In [15]:
def get_brochure_text(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role':'system', 'content':brochure_system_prompt},
            {'role':'user', 'content':get_brochure_user_prompt(company_name, url)}
        ]
    )
    return response.choices[0].message.content
    

In [16]:
convert_lang_system_prompt = "You are a professional translator. You take brochures written in English and \
convert them into Spanish. Return only the Spanish translation in **Markdown** format."


def get_converted_lang_user_prompt(company_name, brochure):
    user_prompt = f"""You are looking at the brochure of a company called: {company_name}.\n
    Here is the brochure:\n
    {brochure}.\n
    Convert the entire brochure into only Spanish. Do not use any English and return only the Spanish version in **Markdown**.
    """
    return user_prompt



In [17]:
def get_spanish_text(company_name, url):
    english_brochure = get_brochure_text(company_name, url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {'role':'system', 'content':convert_lang_system_prompt},
            {'role':'user', 'content':get_converted_lang_user_prompt(company_name, english_brochure)}
        ]
    )
    result = response.choices[0].message.content
    return display(Markdown(result))


In [19]:
print(get_spanish_text('Wikipedia', url))

# Folleto de Wikipedia

### La Enciclopedia Gratuita

---

#### Acerca de Nosotros

Wikipedia es una enciclopedia colaborativa en línea, que proporciona acceso a una gran cantidad de conocimiento en una multitud de idiomas. Con más de **6,974,000** artículos en inglés y más de **55 millones** de artículos disponibles en varios idiomas, nuestra misión es hacer el conocimiento accesible para todos, en cualquier lugar y de manera gratuita.

**Idiomas Ofrecidos**:  
- **Idiomas Principales**: Inglés, Español, Deutsch, Français, Русский  
- **Ofertas Únicas**: Soporte para más de **300 idiomas** incluyendo dialectos menos conocidos como Avar, Chichewa y Cherokee.

---

#### Cultura de la Empresa

En Wikipedia, valoramos los principios de **colaboración, transparencia y comunidad**. Nuestros colaboradores voluntarios provienen de diversos antecedentes, motivados por una pasión compartida por el conocimiento y la educación. Nuestro entorno inclusivo fomenta la creatividad y la innovación, empoderando a las personas para compartir su experiencia y enriquecer la comprensión del mundo.

- **Impulsado por Voluntarios**: Wikipedia prospera gracias a las contribuciones de voluntarios de todo el mundo que escriben y editan artículos.
- **Organización Sin Fines de Lucro**: Como parte de la Fundación Wikimedia, estamos comprometidos a mantener nuestra plataforma libre de anuncios y impulsada únicamente por donaciones y la buena voluntad de nuestros simpatizantes.
  
---

#### Clientes

Nuestra base de usuarios abarca el mundo, compuesta por estudiantes, educadores, investigadores y cualquier persona con curiosidad por aprender. Wikipedia sirve como un recurso confiable para información sobre innumerables temas, desde eventos históricos hasta avances contemporáneos.

- **Instituciones Académicas**: Ampliamente utilizada por estudiantes y profesores para un acceso rápido y fácil a la información.
- **Comunidades Globales**: Las versiones en diferentes idiomas atienden a un amplio público, garantizando que el conocimiento sea accesible para hablantes de todos los idiomas.

---

#### Carreras y Empleos

Siempre estamos buscando personas talentosas que compartan nuestra visión de un mundo donde todos puedan compartir libremente la suma de todo el conocimiento. Si bien muchas contribuciones provienen de voluntarios, la Fundación Wikimedia ofrece oportunidades de carrera en varios campos, incluyendo:

- **Tecnología y Desarrollo**: Contribuye al desarrollo de nuestro software y plataforma.
- **Compromiso Comunitario**: Trabaja directamente con nuestra diversa comunidad de editores y colaboradores.
- **Roles Administrativos**: Apoya nuestra misión sin fines de lucro a través de una efectiva gestión organizacional.

**¡Únete a Nosotros!**  
Si te apasiona el conocimiento abierto y deseas hacer un impacto significativo, explora nuestras oportunidades laborales y conviértete en parte de nuestro equipo.

---

### Apóyanos

¿Quieres ayudar a mantener a Wikipedia en crecimiento? Tus donaciones van directamente a mantener y hacer crecer este recurso invaluable. Considera apoyar nuestra misión de proporcionar conocimiento gratuito a todos.

Visita nuestro sitio web para aprender más y mostrar tu apoyo:  
[Donar a Wikipedia](https://donate.wikimedia.org)

**Descarga la Aplicación de Wikipedia**  
¡Accede al conocimiento sobre la marcha! Descarga la aplicación de Wikipedia en [Android](https://play.google.com/store/apps/details?id=org.wikipedia&hl=es) o [iOS](https://apps.apple.com/app/id324715238) y disfruta de capacidades de lectura offline.

---

Juntos, continuemos empoderando mentes alrededor del mundo con acceso gratuito al conocimiento. ¡Gracias por ser parte de la comunidad de Wikipedia!

None


In [27]:

claude_api_key = os.getenv('ANTHROPIC_API_KEY')
claude = anthropic.Anthropic()

In [28]:
def get_stream_response_from_gpt(company_name, url):
    MESSAGES = [
        {'role': 'system', 'content': brochure_system_prompt},
        {'role': 'user', 'content': get_brochure_user_prompt(company_name, url)}
    ]
    response = openai.chat.completions.create(
        model = 'gpt-4o-mini',
        messages = MESSAGES,
        temperature = 0.4,
        stream = True
    )
    result = ''
    for chunk in response:
        result += chunk.choices[0].delta.content or ''
        yield result

In [29]:
def get_stream_response_from_claude(company_name, url):
    MESSAGES = [
        {'role': 'user', 'content': get_brochure_user_prompt(company_name, url)}
    ]
    response = claude.messages.stream(
        model = 'gpt-4o-mini',
        messages = MESSAGES,
        temperature = 0.4,
        system= brochure_system_prompt
    )
    result = ''
    with response as stream:
        for chunk in stream.text_stream:
            result += chunk or ''
            yield result
           
         

In [30]:
def response_from_streaming_model(company_name, url,  model):
    if model == 'GPT':
        response = get_stream_response_from_gpt(company_name, url)
    elif model == 'Claude':
        response = get_stream_response_from_claude(company_name, url)
    else:
        raise ValueError('Invalid Model')
    yield from response

In [35]:
view = gr.Interface(
    fn = response_from_streaming_model,
    inputs = [gr.Textbox(label = 'Website Name:'), gr.Textbox(label = 'URL:'), 
            gr.Dropdown(['GPT', 'CLAUDE'], label = 'Select Model:')],
    outputs = [gr.Markdown(label = 'Response:')],
    allow_flagging = 'never'
)
view.launch(share=True)

Running on local URL:  http://127.0.0.1:7873
Running on public URL: https://0fd40d060b6368ddfb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


