In [53]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [54]:
# Initialize and constants

openai = OpenAI(base_url='http://localhost:11434/v1', api_key='ollama')

In [55]:
# A class to represent a Webpage

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [56]:
# Let's try one out. Change the website and add print statements to follow along.

ed = Website("https://www.symphonize.com/")
print(ed.title)
print(ed.text)

Symphonize | Solutions That Resonate
About Us
Services
Case Studies
Insights
Careers
Contact
Contact
Solutions that resonate.
Our team of experts in UX, engineering, and delivery collaborates seamlessly to craft innovative solutions that connect with your customers and maximize results.
Tailored, brand-enhancing solutions that make noise.
For over 15 years, Symphonize has been a trusted partner specializing in strategy, user experience design, and software development. With global expertise spanning business process automation to mobile and web applications, we collaborate with clients across various industries, from startups to multinational enterprises.
Learn More
The challenge of conducting digital transformation.
Digital transformation presents major challenges for many organizations, often due to a lack of skilled expertise. We integrate enterprise architecture, development, strategic business insight, and thoughtful UX design. This enables us to build the right team for your digi

In [57]:
ed=Website("https://www.symphonize.com/")
ed.links

['/',
 '/about',
 '/services',
 '/case-studies',
 '/insights',
 '/careers',
 '/contact-us',
 '/contact-us',
 '/services',
 '/services',
 '/services',
 '/contact-us',
 '/',
 '/privacy-policy',
 'https://twitter.com/i/flow/login?redirect_after_login=%2FSymphonizeInc',
 'https://www.facebook.com/SymphonizeInc/',
 'https://www.linkedin.com/company/symphonizeinc/',
 'https://twitter.com/i/flow/login?redirect_after_login=%2FSymphonizeInc',
 'https://www.facebook.com/SymphonizeInc/',
 'https://www.linkedin.com/company/symphonizeinc/',
 '/privacy-policy']

### Giving the system to analysis the links in the site

In [59]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [60]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [61]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [62]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://www.symphonize.com/ - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
/
/about
/services
/case-studies
/insights
/careers
/contact-us
/contact-us
/services
/services
/services
/contact-us
/
/privacy-policy
https://twitter.com/i/flow/login?redirect_after_login=%2FSymphonizeInc
https://www.facebook.com/SymphonizeInc/
https://www.linkedin.com/company/symphonizeinc/
https://twitter.com/i/flow/login?redirect_after_login=%2FSymphonizeInc
https://www.facebook.com/SymphonizeInc/
https://www.linkedin.com/company/symphonizeinc/
/privacy-policy


In [63]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model="llama3.2",
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [64]:
# Anthropic has made their site harder to scrape, so I'm using HuggingFace..

link = Website("https://www.symphonize.com/")
link.links

['/',
 '/about',
 '/services',
 '/case-studies',
 '/insights',
 '/careers',
 '/contact-us',
 '/contact-us',
 '/services',
 '/services',
 '/services',
 '/contact-us',
 '/',
 '/privacy-policy',
 'https://twitter.com/i/flow/login?redirect_after_login=%2FSymphonizeInc',
 'https://www.facebook.com/SymphonizeInc/',
 'https://www.linkedin.com/company/symphonizeinc/',
 'https://twitter.com/i/flow/login?redirect_after_login=%2FSymphonizeInc',
 'https://www.facebook.com/SymphonizeInc/',
 'https://www.linkedin.com/company/symphonizeinc/',
 '/privacy-policy']

In [65]:
get_links("https://www.symphonize.com/")

{'links': [{'type': 'About page', 'url': 'https://www.symphonize.com/about'},
  {'type': 'Services page', 'url': 'https://www.symphonize.com/services'},
  {'type': 'Case Studies page',
   'url': 'https://www.symphonize.com/case-studies'},
  {'type': 'Insights page', 'url': 'https://www.symphonize.com/insights'},
  {'type': 'Careers/Jobs page', 'url': 'https://www.symphonize.com/careers'},
  {'type': 'Contact page', 'url': 'https://www.symphonize.com/contact-us'}]}

In [66]:
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [67]:
print(get_all_details("https://www.symphonize.com/"))

Found links: {'links': [{'type': 'about page', 'url': 'https://www.symphonize.com/about'}, {'type': 'services page', 'url': 'https://www.symphonize.com/services'}, {'type': 'case studies page', 'url': 'https://www.symphonize.com/case-studies'}, {'type': 'insights page', 'url': 'https://www.symphonize.com/insights'}, {'type': 'careers page', 'url': 'https://www.symphonize.com/careers'}]}
Landing page:
Webpage Title:
Symphonize | Solutions That Resonate
Webpage Contents:
About Us
Services
Case Studies
Insights
Careers
Contact
Contact
Solutions that resonate.
Our team of experts in UX, engineering, and delivery collaborates seamlessly to craft innovative solutions that connect with your customers and maximize results.
Tailored, brand-enhancing solutions that make noise.
For over 15 years, Symphonize has been a trusted partner specializing in strategy, user experience design, and software development. With global expertise spanning business process automation to mobile and web applications

In [68]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [69]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000] # Truncate if more than 5,000 characters
    return user_prompt

In [70]:
get_brochure_user_prompt("Symphonize","https://www.symphonize.com/")

Found links: {'links': [{'type': 'About page', 'url': 'https://www.symphonize.com/about'}, {'type': 'Services page', 'url': 'https://www.symphonize.com/services'}, {'type': 'Case studies page', 'url': 'https://www.symphonize.com/case-studies'}, {'type': 'Insights page', 'url': 'https://www.symphonize.com/insights'}, {'type': 'Careers/Jobs page', 'url': 'https://www.symphonize.com/careers'}, {'type': 'Contact us page', 'url': 'https://www.symphonize.com/contact-us'}]}


'You are looking at a company called: Symphonize\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nSymphonize | Solutions That Resonate\nWebpage Contents:\nAbout Us\nServices\nCase Studies\nInsights\nCareers\nContact\nContact\nSolutions that resonate.\nOur team of experts in UX, engineering, and delivery collaborates seamlessly to craft innovative solutions that connect with your customers and maximize results.\nTailored, brand-enhancing solutions that make noise.\nFor over 15 years, Symphonize has been a trusted partner specializing in strategy, user experience design, and software development. With global expertise spanning business process automation to mobile and web applications, we collaborate with clients across various industries, from startups to multinational enterprises.\nLearn More\nThe challenge of conducting digital transformation.\nDigital transfo

In [71]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model="llama3.2",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [81]:
create_brochure("Symphonize","https://www.symphonize.com/")

Found links: {'links': [{'type': 'About page', 'url': 'https://www.symphonize.com/about'}, {'type': 'Case studies page', 'url': 'https://www.symphonize.com/case-studies'}, {'type': 'Insights page', 'url': 'https://www.symphonize.com/insights'}, {'type': 'Careers/Jobs page', 'url': 'https://www.symphonize.com/careers'}, {'type': 'Company (root) page', 'url': 'https://www.symphonize.com'}]}


**Brochure: Symphony to the Beat**

[Cover Image: A group of diverse individuals working together in a harmonious atmosphere]

Welcome to Symphonize, where innovative solutions come alive! For over 15 years, our team of experts has been resonating with clients across various industries, from startups to multinational enterprises.

**Our Harmony**

We believe that technology should be like music – it should resonate with your customers and amplify your transformation journey. Our expert team in UX, engineering, and delivery collaborate seamlessly to craft tailored solutions that make a lasting impact.

**Services That Symphony Together**

* **Web and Mobile Development**: With the latest technology stack and hassle-free deployment, we create digital experiences that are impactful and effective.
* **User Interface/User Experience Design**: Our design experts craft seamless customer journeys that minimize friction and provide thoughtful guidance.
* **Ongoing Support**: We provide end-to-end cloud services to ensure your business operates efficiently and securely.

**Harmonious Careers**

Join our symphony of talented individuals who share a passion for innovation and excellence. Check out our job openings at [Link]!

**What Our Partners Say**

"A team that makes it so much easier! They can see through complexity and pull together an elegant solution." - Sean Johnston, Senior Vice President - Operations and Technology

"We have been successfully partnering with Symphonize for many years... Their team functions seamlessly as an extension of our product management teams." - Terry Leahy, CEO

**Let the Harmony Begin**

Our unified team is ready to orchestrate your next digital transformation. Contact us at [Link] to learn more about how we can harmonize your business and amplify your success.

**Connect with Us**

Twitter: @[Symphonize Twitter Handle]
 LinkedIn: @[Symphonize LinkedIn Profile]
 Facebook: @[Symphonize Facebook Page]

Join the symphony and let's create a brighter future together!

### Improvements to set the brochure somewhat appealing

In [83]:
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="llama3.2",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [86]:
stream_brochure("Symphonize","https://www.symphonize.com/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.symphonize.com/about'}, {'type': 'services page', 'url': 'https://www.symphonize.com/services'}, {'type': 'case studies page', 'url': 'https://www.symphonize.com/case-studies'}, {'type': 'insights page', 'url': 'https://www.symphonize.com/insights'}, {'type': 'careers page', 'url': 'https://www.symphonize.com/careers'}, {'type': 'contact page', 'url': 'https://www.symphonize.com/contact-us'}]}


**The Symphony of Innovation**
==========================

At Symphonize, we believe that innovation is like music – it's all about harmony, resonance, and movement forward. Our team of experts in UX, engineering, and delivery collaborate to create solutions that strike a chord with your customers and maximize results.

**Our Story**
-------------

With over 15 years of experience, we've established ourselves as a trusted partner for digital transformation. We've worked with clients across various industries, from startups to multinational enterprises, to deliver tailored solutions that make noise in the market.

**What Sets Us Apart**
-------------------

* **Dance Among Expertise**: Our team combines enterprise architecture, development, strategic business insight, and thoughtful UX design to create innovative solutions.
* **Technology Made Simple**: Our skilled engineers master the latest technology stack with hassle-free deployment, ensuring your digital presence is impactful and effective.
* **Resonate with Your Customers**: Our design experts craft a seamless customer journey that provides thoughtful guidance and minimizes friction.

**Our Clients Love Us**
--------------------

* "Symphonize team is amazing! Their ideas and designs for our website are truly exceptional..." - Patricia Campbell, Chief Operating Officer at Christian Financial Credit Union
* "Symphonize makes it so much easier... We can see through the complexity to an elegant solution that makes a complex product more intuitive." - Ed Rose, CEO at Paqqets

**Join the Symphony**
------------------

Ready to harmonize your digital transformation journey?

* **Careers**: Join our team of innovative thinkers and problem-solvers. Our careers page is open for inquiries.
* **Investor Opportunities**: Explore our investment potential with Symphonize's unique approach to UX, engineering, and delivery.
* **Contact Us**: Reach out to our experts directly to discuss your next digital transformation project.

**Experience the Harmony**
----------------------

At Symphonize, we orchestrate the symphony of innovation for you. Let us guide you through the modern landscape with an optimized delivery model, unbeatable value, and unwavering integrity.

---

Contact: [contact@symphonize.com](mailto:contact@symphonize.com)

Follow us on social media:

* Twitter: @SymphonizeAI
* LinkedIn: linkedin.com/company/symphonizea
* Facebook: facebook.com/symphoniza

In [None]:
spanish_system_prompy="You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short humorous, entertaining, jokey brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information. But i need this brochure in spanish language only"

In [None]:
def spanish_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model="llama3.2",
        messages=[
            {"role": "system", "content":  spanish_system_prompy},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [None]:
spanish_brochure("Symphonize","https://www.symphonize.com/")

In [88]:
import gradio as gr

In [92]:
view = gr.Interface(
    fn=spanish_brochure,
    inputs=[gr.Textbox(label="company_name:"),
            gr.Textbox(label="company_url(Including HTTP/:")],
    outputs=[gr.Markdown(label="Brochure")],
    flagging_mode="never"
)
view.launch()

* Running on local URL:  http://127.0.0.1:7873

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\Mastan\anaconda3\Lib\site-packages\urllib3\connection.py", line 199, in _new_conn
    sock = connection.create_connection(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Mastan\anaconda3\Lib\site-packages\urllib3\util\connection.py", line 60, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Mastan\anaconda3\Lib\socket.py", line 976, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
socket.gaierror: [Errno 11001] getaddrinfo failed

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\Mastan\anaconda3\Lib\site-packages\urllib3\connectionpool.py", line 789, in urlopen
    response = self._make_request(
      