## Import important libraries

In [None]:
import os 
import requests
from openai import OpenAI
from IPython.display import update_display, Markdown, display
import json
from bs4 import BeautifulSoup
from typing import List
from dotenv import load_dotenv

In [None]:
load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    

headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

In [None]:
class Website:
    text:str
    links: List
    body: str
    title: str
    url: str

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else 'No title Found'
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
        else:
            self.text = ' '
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]
    
    def get_content(self):
        return f" Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [None]:
system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_prompt += "You should respond in JSON as in this example:"
system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""


def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
    Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [None]:
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [None]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {'role':'system', 'content':system_prompt},
            {'role':'user', 'content': get_links_user_prompt(website)}
        ],
        response_format= {'type': 'json_object'}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [None]:
url = 'https://www.wikipedia.org/'
web = Website(url)

In [None]:
get_links(url)

In [None]:
def get_all_details(url):
    result = f"Landing Page:\n"
    result += Website(url).get_content()
    links = get_links(url)
    for link in links['links']:
        result += f"\n\n{link['type']}\n"
        result += f"{Website(link['url']).get_content()}\n"
    return result

In [None]:
brochure_system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [None]:
print(get_all_details(url))

In [None]:
def create_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {'role': 'system', 'content': brochure_system_prompt},
            {'role': 'user', 'content': get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    response = ''
    display_handle = display(Markdown(' '), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ' '
        response_cleaned = response.replace("```", ' ').replace("markdown", ' ')
        update_display(Markdown(response_cleaned), display_id=display_handle.display_id)

    return response_cleaned  


# streaming without markdown 
# for chunk in stream:
#   print(chunk.choices[0].delta.content or ' ', end=' ')

# Brochure without streaming
# def create_brochure(company_name, url):
#     response = openai.chat.completions.create(
#         model=MODEL,
#         messages=[
#             {'role':'system', 'content':brochure_system_prompt},
#             {'role':'user', 'content':get_brochure_user_prompt(company_name, url)}
#         ]
#     )
#     result = response.choices[0].message.content
#     return display(Markdown(result))

In [None]:
print(create_brochure('Wikipedia', url))

In [None]:
convert_lang_system_prompt = "You are a professional translator. You take brochures written in English and \
convert them into Spanish. Return only the Spanish translation in **Markdown** format."


def get_converted_lang_user_prompt(company_name, brochure):
    user_prompt = f"""You are looking at the brochure of a company called: {company_name}.\n
    Here is the brochure:\n
    {brochure}.\n
    Convert the entire brochure into Spanish. Return only the Spanish version in **Markdown**.
    """
    return user_prompt



In [None]:
def get_spanish_text(company_name, url):
    english_brochure = create_brochure(company_name, url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {'role':'system', 'content':convert_lang_system_prompt},
            {'role':'user', 'content':get_converted_lang_user_prompt(company_name, english_brochure)}
        ]
    )
    result = response.choices[0].message.content
    return display(Markdown(result))


In [None]:
print(get_spanish_text('Wikipedia', url))