In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [3]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
ed = Website("https://and-element.com/")
ed.links

['/',
 '/about',
 '/projects',
 '/blog',
 '/contact-us',
 '#services',
 '/services/website-development',
 '/services/ux-design',
 '/services/app-development',
 '/services/digital-marketing',
 '/services/ai-development',
 '/contact-us',
 '/projects/anglian-truck-tyres',
 '/projects/arma-karma',
 '/projects/le-sankey-arts',
 '/projects/live-you',
 '/projects/retreat-east',
 '/projects/samsung',
 '/projects/skipper-my-boat',
 '/projects/university-of-suffolk',
 '/projects',
 '/contact-us',
 'https://calendly.com/luke-and-element/30min',
 'tel:01206 259355',
 'https://www.instagram.com/elementsoftworks/?hl=en',
 'https://m.facebook.com/andelementagency',
 'https://www.linkedin.com/company/and-element/mycompany/',
 '/cdn-cgi/l/email-protection#e591808488a5848b81c880898088808b91cb868a88',
 'tel:01206 259355',
 '/',
 '/about',
 '/projects',
 '/services',
 '/contact-us',
 '/services/website-development',
 '/services/ux-design',
 '/services/app-development',
 '/services/digital-marketing',
 '/s

## First step: Have GPT-4o-mini figure out which links are relevant

### Use a call to gpt-4o-mini to read the links on a webpage, and respond in structured JSON.  
It should decide which links are relevant, and replace relative links such as "/about" with "https://company.com/about".  
We will use "one shot prompting" in which we provide an example of how it should respond in the prompt.

This is an excellent use case for an LLM, because it requires nuanced understanding. Imagine trying to code this without LLMs by parsing and analyzing the webpage - it would be very hard!

Sidenote: there is a more advanced technique called "Structured Outputs" in which we require the model to respond according to a spec. We cover this technique in Week 8 during our autonomous Agentic AI project.

In [5]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages and projects pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [6]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages and projects pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://and-element.com/ - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
/
/about
/projects
/blog
/contact-us
#services
/services/website-development
/services/ux-design
/services/app-development
/services/digital-marketing
/services/ai-development
/contact-us
/projects/anglian-truck-tyres
/projects/arma-karma
/projects/le-sankey-arts
/projects/live-you
/projects/retreat-east
/projects/samsung
/projects/skipper-my-boat
/projects/university-of-suffolk
/projects
/contact-us
https://calendly.com/luke-and-element/30min
tel:01206 259355
https://www.instagram.com/elementsoftworks/?hl=en
https://m.facebook.com/andelementagency
https://www.linkedin.com/company/and-element/mycompany/
/cdn-cgi/l/email-protection#e591808488a5848b81c880898088808b91cb868a88
tel:01206 259355
/
/

In [9]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [10]:
# Anthropic has made their site harder to scrape, so I'm using HuggingFace..

ed = Website("https://and-element.com/")
ed.links

['/',
 '/about',
 '/projects',
 '/blog',
 '/contact-us',
 '#services',
 '/services/website-development',
 '/services/ux-design',
 '/services/app-development',
 '/services/digital-marketing',
 '/services/ai-development',
 '/contact-us',
 '/projects/anglian-truck-tyres',
 '/projects/arma-karma',
 '/projects/le-sankey-arts',
 '/projects/live-you',
 '/projects/retreat-east',
 '/projects/samsung',
 '/projects/skipper-my-boat',
 '/projects/university-of-suffolk',
 '/projects',
 '/contact-us',
 'https://calendly.com/luke-and-element/30min',
 'tel:01206 259355',
 'https://www.instagram.com/elementsoftworks/?hl=en',
 'https://m.facebook.com/andelementagency',
 'https://www.linkedin.com/company/and-element/mycompany/',
 '/cdn-cgi/l/email-protection#730716121e33121d175e161f161e161d075d101c1e',
 'tel:01206 259355',
 '/',
 '/about',
 '/projects',
 '/services',
 '/contact-us',
 '/services/website-development',
 '/services/ux-design',
 '/services/app-development',
 '/services/digital-marketing',
 '/s

In [11]:
get_links("https://and-element.com/")

{'links': [{'type': 'about page', 'url': 'https://and-element.com/about'},
  {'type': 'projects page', 'url': 'https://and-element.com/projects'},
  {'type': 'services page', 'url': 'https://and-element.com/services'}]}

## Second step: make the brochure!

Assemble all the details into another prompt to GPT4-o

In [12]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [14]:
#print(get_all_details("https://and-element.com/")

SyntaxError: incomplete input (3405127960.py, line 1)

In [15]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [16]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [17]:
get_brochure_user_prompt("ed", "https://and-element.com/")

Found links: {'links': [{'type': 'about page', 'url': 'https://and-element.com/about'}, {'type': 'projects page', 'url': 'https://and-element.com/projects'}, {'type': 'services page', 'url': 'https://and-element.com/services'}]}


'You are looking at a company called: ed\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nWeb Development and Digital Marketing Agency • &Element\nWebpage Contents:\n&Element\nAbout\nServices\nProjects\nInsights\nContact Us\nWe are an\nInnovative\nWeb Development Agency in Essex\nWe are\xa0&Element, an award-winning creative agency specialising in web development, artificial intelligence and app development services alongside SEO, brand strategy and UX reviews.\nDiscover &Element\nWe’ve helped hundreds of businesses digitally optimise their company and increase profits.\n9+\n9+ years of experience\nOur team is hyper-focused on helping businesses optimise their internal cloud platforms and growth their digital presence.\n230% average increase in ROI across our suite of services.\n235+ projects completed since we were founded in 2015.\n45m people reached across t

In [18]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [19]:
create_brochure("ed", "https://and-element.com/")

Found links: {'links': [{'type': 'about page', 'url': 'https://and-element.com/about'}, {'type': 'projects page', 'url': 'https://and-element.com/projects'}, {'type': 'services page', 'url': 'https://and-element.com/services'}]}


```markdown
# Welcome to &Element
**Your Premier Web Development and Digital Marketing Agency**

---

## About Us
At **&Element**, we are an award-winning creative agency located in Essex, specializing in web development, artificial intelligence, app development, and digital marketing services. Since our inception in 2015, we've dedicated ourselves to helping businesses digitally optimize their operations, leading to increased profits and enhanced digital presence.

**Key Achievements:**
- **9+ years of experience**
- **230% average increase in ROI** across our suite of services
- **235+ projects completed** since 2015
- **45 million people reached** globally through our work

---

## Our Services
We provide a comprehensive suite of services tailored to meet your business needs:

### 1. Web Development
- **Specialization:** React.js websites and dashboards
- **Outcome:** Bespoke responsive websites designed to increase sales

### 2. UX Design
- **Goal:** Create unforgettable and highly converting brand experiences
- **Benefit:** Overall brand strategy improvements

### 3. App Development
- **Scope:** Professional app development for all major platforms, including iOS, Android, Linux, Windows, and Mac
- **Specialty:** Store kiosks and Progressive Web Apps (PWAs)

### 4. Digital Marketing
- **Focus:** Navigate the digital landscape for improved online rankings
- **Methodology:** Scalable marketing strategies that deliver results

### 5. Artificial Intelligence
- **Services:** Smart applications, web scrapers, and digital assistants
- **Impact:** Save time and effort while enhancing user satisfaction

---

## Company Culture
At &Element, our culture thrives on innovation, collaboration, and a results-driven mindset. We believe in the power of teamwork and value the contributions of every team member. Our work environment is designed to foster creativity and a strong sense of community, ensuring that we grow together while achieving excellence for our clients.

---

## Our Clients Speak
Here’s what some of our valued clients have to say about working with us:

- **Ruth Patron, Centre Manager, University of Suffolk:**  
  “&Element created us an immersive brand and brand strategy for Entrepreneurs Forge. The team worked closely with us to deliver exactly what we wanted.”

- **Dr. Peter Cochrane OBE, Ex-CTO of BT:**  
  “There is nothing like a change of career for creating a tidal wave of disruption; &Element are the best I have worked with to progress this change.”

- **Christopher Luich, Head of Operations, Ballen Studios:**  
  “&Element have been fundamental to developing our AI dashboard and other machine learning solutions that have enabled us to modernize our company.”

---

## Careers at &Element
We are always on the lookout for passionate and talented individuals to join our team. If you're interested in building a career in a dynamic and innovative environment, we encourage you to check our career opportunities on our website. At &Element, you won’t just be working on projects; you will be part of a creative force that is shaping the digital landscape.

---

## Get in Touch
Want to discover how we can help you save time and money while driving your business forward? **[Contact us](#)** today!

---

**&Element - Your partners in digital excellence.**
```


## Finally - a minor improvement

With a small adjustment, we can change this so that the results stream back from OpenAI,
with the familiar typewriter animation

In [None]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
stream_brochure("ed", "https://and-element.com/")

In [None]:
# Try changing the system prompt to the humorous version when you make the Brochure for Hugging Face:

stream_brochure("HuggingFace", "https://huggingface.co")