In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [2]:
# Constants

OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

In [3]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [4]:
ed = Website("https://edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/',
 'https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/',
 'https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/',
 'https://edwarddonner.com/

In [1]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [7]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [8]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/
https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/
https://edwarddonner.com/2024/11/13/llm-engineering-resources/
https://edwarddonner.com/2024/11/13/ll

In [9]:
!ollama pull llama3.2

[?25lpulling manifest â ‹ [?25h[?25l[2K[1Gpulling manifest â ™ [?25h[?25l[2K[1Gpulling manifest â ¹ [?25h[?25l[2K[1Gpulling manifest â ¸ [?25h[?25l[2K[1Gpulling manifest â ¼ [?25h[?25l[2K[1Gpulling manifest â ´ [?25h[?25l[2K[1Gpulling manifest â ¦ [?25h[?25l[2K[1Gpulling manifest â § [?25h[?25l[2K[1Gpulling manifest â ‡ [?25h[?25l[2K[1Gpulling manifest â � [?25h[?25l[2K[1Gpulling manifest â ‹ [?25h[?25l[2K[1Gpulling manifest â ™ [?25h[?25l[2K[1Gpulling manifest 
pulling dde5aa3fc5ff... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 2.0 GB                         
pulling 966de95ca8a6... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 1.4 KB                         
pulling fcc5a6bec9da... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 7.7 KB                         
pulling a70ff7e570d9... 100% â–•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–� 6.0 KB                         
pulling 56bb8bd477a5

In [10]:
from openai import OpenAI
ollama_via_openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [11]:
def get_links(url):
    website = Website(url)
    response = ollama_via_openai.chat.completions.create(
    model=MODEL,
    messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [12]:
Symphonize = Website("https://www.symphonize.com/")
Symphonize.links

['/',
 '/about',
 '/services',
 'https://www.symphonize.com/case-studies',
 '/insights',
 '/careers',
 '/contact-us',
 '/contact-us',
 '/services',
 '/contact-us',
 '/services',
 '/services-page#web-mobile-development',
 '/services-page#ui-ux-design',
 '/services-page#360-cloud-services',
 '/services-page#intelligent-process-automation',
 '/services-page#web-mobile-development',
 '/services-page#ui-ux-design',
 '/services-page#360-cloud-services',
 '/services-page#intelligent-process-automation',
 '/website-ver3/services-page#ui-ux-design',
 '/website-ver3/services-page#web-mobile-development',
 '/website-ver3/services-page#360-cloud-services',
 '/website-ver3/services-page#intelligent-process-automation',
 '/contact-us',
 '/contact-us',
 '#top-hero-section',
 '/privacy-policy',
 'https://twitter.com/i/flow/login?redirect_after_login=%2FSymphonizeInc',
 'https://www.facebook.com/SymphonizeInc/',
 'https://www.linkedin.com/company/symphonizeinc/',
 'https://twitter.com/i/flow/login?redi

In [13]:
get_links("https://www.symphonize.com/")

{'links': [{'type': 'about page', 'url': 'https://www.symphonize.com/about'},
  {'type': 'case studies page',
   'url': 'https://www.symphonize.com/case-studies'},
  {'type': 'insights page', 'url': 'https://www.symphonize.com/insights'},
  {'type': 'careers page', 'url': 'https://www.symphonize.com/careers'},
  {'type': 'contact us link', 'url': 'https://www.symphonize.com/contact-us'}]}

In [14]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [15]:
print(get_all_details("https://www.symphonize.com/"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.symphonize.com/about'}, {'type': 'case studies page', 'url': 'https://www.symphonize.com/case-studies'}, {'type': 'insights page', 'url': 'https://www.symphonize.com/insights'}, {'type': 'company page', 'url': 'https://www.linkedin.com/company/symphonizeinc/'}, {'type': 'careers page', 'url': 'https://www.symphonize.com/careers'}]}
Landing page:
Webpage Title:
Symphonize | Amplify your digital transformation.
Webpage Contents:
About Us
Services
Case Studies
Insights
Careers
Contact
Contact
Amplify your digital transformation.
Our team of experts in UX, Engineering, and Delivery collaborates seamlessly to craft innovative solutions that maximize results.
Data-Driven, Cloud-Centric, & User-Focused solutions that resonate.
Symphonize is a renowned partner skilled in strategy, user experience design, and software development. Trusted globally, with expertise spanning business process automation to mobile and web-based appli

In [16]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

# Or uncomment the lines below for a more humorous brochure - this demonstrates how easy it is to incorporate 'tone':

# system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
# and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
# Include details of company culture, customers and careers/jobs if you have the information."


In [17]:
import requests

def validate_url(url):
    if not url.startswith(("http://", "https://")):
        url = "https://" + url
    return url

def get_all_details(url):
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raise an HTTPError for bad responses (4xx and 5xx)
        return response.text  # Assuming you want the raw HTML content
    except requests.exceptions.RequestException as e:
        print(f"Error fetching details from URL: {e}")
        return "Error fetching content. Please check the URL."

def get_brochure_user_prompt(company_name, url):
    url = validate_url(url)  # Ensure the URL is valid
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    details = get_all_details(url)
    user_prompt += details[:5_000]  # Truncate if more than 5,000 characters
    return user_prompt


In [18]:
print(get_brochure_user_prompt("Symphonize", "https://www.symphonize.com/"))

You are looking at a company called: Symphonize
Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.
<!DOCTYPE html><!-- This site was created in Webflow. https://webflow.com --><!-- Last Published: Thu Dec 19 2024 15:27:44 GMT+0000 (Coordinated Universal Time) --><html data-wf-domain="www.symphonize.com" data-wf-page="65e7140156ebb28e19473bf7" data-wf-site="634690d9583ce36a5e30c703"><head><meta charset="utf-8"/><title>Symphonize | Amplify your digital transformation.</title><meta content="Our team of experts in UX, Engineering, and Delivery collaborates seamlessly to craft innovative solutions that maximize results." name="description"/><meta content="Symphonize | Amplify your digital transformation." property="og:title"/><meta content="Our team of experts in UX, Engineering, and Delivery collaborates seamlessly to craft innovative solutions that maximize results." property="og:description"/><met

In [19]:
def create_brochure(company_name, url):
    response = ollama_via_openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [20]:
create_brochure("Symphonize", "https://www.symphonize.com/")

**Symphonize Brochure**
======================

[Cover Image: Symphonyze Logo](https://cdn.prod.website-files.com/634690d9583ce36a5e30c703/65701df905012326402f8256_White-Logo.png)

Welcome to SymphoniZe
---------------------

Amplify your digital transformation with our team of experts in UX, Engineering, and Delivery. We craft innovative solutions that maximize results.

**Our Story**
-------------

At SymphoniZe, we believe that technology should enhance human experience, not control it. Our team of experts comes together from diverse backgrounds to deliver cutting-edge solutions that meet the unique needs of our clients.

**Services**
----------

* UX Design
* Engineering
* Delivery

**What We Do Best**
-------------------

We help organizations transform their digital presence with innovative solutions that:

* Enhance user experience
* Drive business growth
* Deliver exceptional results

**Case Studies**
--------------

Read about the success stories of our clients who have trusted us to deliver high-quality solutions.

[Link to Case Studies Page](https://www.symphonize.com/case-studies)

**Insights & Expertise**
-----------------------

Stay ahead of the curve with our thought leadership articles, insights, and expert advice on the latest trends in digital transformation.

[Link to Insights Page](https://www.symphonize.com/insights)

**Careers & Join Us**
---------------------

Join our team of innovators and help shape the future of digital transformation. We offer exciting opportunities for talented individuals to grow with us.

[Link to Careers Page](https://www.symphonize.com/careers)

**Stay Connected**
-----------------

Follow us on social media to stay up-to-date with the latest news, trends, and insights from SymphoniZe.

* LinkedIn: [LinkedIn Link]
* Twitter: [Twitter Link]
* Facebook: [Facebook Link]

Contact Us
----------

Get in touch with our team today to learn more about how we can help you amplify your digital transformation.

[Link to Contact Us Page](https://www.symphonize.com/contact-us)

In [23]:
def stream_brochure(company_name, url):
    stream = ollama_via_openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        
        # Add colors using HTML and emojis
        response = response.replace("```", "").replace("markdown", "")
        response = response.replace(
            "Company Highlights", 
            '<span style="color:blue;">Company Highlights 🌟</span>'
        )
        
        update_display(Markdown(response), display_id=display_handle.display_id)


In [24]:
stream_brochure("Symphonize", "https://www.symphonize.com/")

**Symphonize Brochure**
=========================

**Overview**
------------

Symphonize is a leading digital transformation solutions provider that helps organizations amplify their business potential. With a team of experts in UX, Engineering, and Delivery, we collaborate seamlessly to craft innovative solutions that maximize results.

**Our Philosophy**
-----------------

At Symphonize, we believe that digital transformation is not just about technology, but about people and culture too. We're committed to creating solutions that are both effective and engaging, because we know that's what makes all the difference for our clients.

**Our Clients**
--------------

We've had the privilege of working with some amazing organizations across various industries. Our clients trust us to deliver innovative solutions that drive real results.

* [Your Company Name], a leading [industry] player
* [Another Client's Name], a pioneering brand in [industry]
* [Third Client's Name], a innovative start-up

**Our Services**
-----------------

We offer a range of digital transformation services, including:

* UX design and consulting
* Engineering and development
* Delivery and implementation

**Why Choose Symphonize?**
-------------------------

* Expertise: Our team has years of experience in delivering successful digital projects.
* Collaboration: We believe that collaboration is key to success. That's why we work closely with our clients to understand their needs and deliver solutions that meet their goals.
* Innovation: We're always looking for new ways to solve problems and innovate our services.

**Join Our Team**
---------------

If you're passionate about digital transformation and want to be part of a talented team, check out our careers page for current openings. [link]

**Contact Us**
--------------

Ready to discuss how we can help your organization amplify its digital transformation? Contact us today! [link]