# Generate Company's Brochure with Multi-shot Prompting

In [None]:
# importing necessary libraries
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [None]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [None]:
# A class to Parse webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"


In [None]:
conti = Website("https://www.continental-automotive.com/en.html")
#conti.links

# Let's use the LLMs to find relevant Links using Multi-shot Prompting 

In [None]:
system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_prompt += "You should respond in JSON as in this example:"
system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}\n
"""
system_prompt += "You should definitely include at least one link relating to media websites like Youtube/LinkedIn if available. \
You should respond in JSON as in this example:"
system_prompt += """
{
    "links": [
        {"type": "social media", "url": "https://full.url/goes/here/about/media"},
    ]
}
"""

In [None]:
print(system_prompt)

In [None]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [None]:
print(get_links_user_prompt(conti))

# Calling the API

In [None]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = MODEL,
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    return json.loads(response.choices[0].message.content)

In [None]:
# Call
get_links("https://www.continental-automotive.com/en.html")

# Making second API call
We will now use the contents of the first call to make a second call to make the brochure

In [None]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [None]:
#print(get_all_details("https://www.continental-automotive.com/en.html"))

In [None]:
system_prompt_brochure = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information.\
If you have information about company's social media, emphasize it in the brochure with emojis of that social media platform."

In [None]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 5,000 characters
    return user_prompt

In [None]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt_brochure},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ]
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [None]:
create_brochure("Continental Automotive", "https://www.continental-automotive.com/en.html")

# Streaming in contents instead of block

In [None]:
# Streaming the contents
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt_brochure},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
stream_brochure("Continental Automotive", "https://www.continental-automotive.com/en.html")

# Using Ollama for Brochure
We are using llama3.2 and it was observed that it could not produce satisfying results for some complex websites

In [None]:
#Constants
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

In [None]:
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

# Keeping system prompt single shot for llama3.2 capabilities --> 1st Call
system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
system_prompt += "You should respond in JSON as in this example:"
system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""
# 2nd Call
system_prompt_brochure = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information.\
If you have information about company's social media, emphasize it in the brochure with emojis of that social media platform."

#Helper functions
def get_links_ollama(url, model="llama3.2"):
    website = Website(url)
    response = ollama_via_openai.chat.completions.create(
        model = model,
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
        response_format={"type": "json_object"}
    )
    return json.loads(response.choices[0].message.content)


def get_all_details_ollama(url, model="llama3.2"):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links_ollama(url, model)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

def get_brochure_user_prompt_ollama(company_name, url, model="llama3.2"):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details_ollama(url, model)
    user_prompt = user_prompt[:20_000] # Truncate if more than 5,000 characters
    return user_prompt

def create_brochure_ollama(company_name, url, model="llama3.2"):
    response = ollama_via_openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt_brochure},
            {"role": "user", "content": get_brochure_user_prompt_ollama(company_name, url, model)}
          ]
    )
    result = response.choices[0].message.content
    display(Markdown(result))

# Streaming the contents
def stream_brochure_ollama(company_name, url, model="llama3.2"):
    stream = ollama_via_openai.chat.completions.create(
        model=model,
        messages=[
            {"role": "system", "content": system_prompt_brochure},
            {"role": "user", "content": get_brochure_user_prompt_ollama(company_name, url, model)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
# Pull the model
!ollama pull llama3.2

In [None]:
med = Website("https://vercel.com/")
med.title

In [None]:
get_brochure_user_prompt_ollama("Vercel", "https://vercel.com/")

In [None]:
create_brochure_ollama("Vercel", "https://vercel.com/", "llama3.2")

In [None]:
stream_brochure_ollama("Vercel", "https://vercel.com/", "llama3.2")