In [66]:
import os
from openai import OpenAI
from IPython.display import Markdown, display
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import ollama
import requests
import json

In [4]:
load_dotenv(override=True)
openai_key = os.getenv('OPENAI_API_KEY')

In [52]:
MODEL = 'gpt-4o-mini'
openai = OpenAI()

In [6]:
# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

In [137]:
class Website:

    def __init__(self, url):
        self.url = url
        self.body = None
        self.title = None
        self.text = None
        self.links = None


    def scrape_url(self):
        response = requests.get(self.url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.text if soup.title else "No Title"
        for not_needed in soup.body(["script", "style", "img", "input"]):
            not_needed.decompose()
        self.text = soup.body.get_text(separator='\n', strip= True)
        self.links = [link.get('href') for link in soup.find_all('a')]
        # return self.links 

    
    def get_content(self):
        self.scrape_url()
        return f"Website title: {self.title}, Website content:{self.text}"

    
    def get_link_system_prompt(self):
        link_system_prompt = "You are an agent to reform a list of link provided to you from a website \
            You are able to decide which of the links would be most relevant to include in a brochure \
            about the company, such as \
            the about page or careers page or the jobs page and so on \n" 
        link_system_prompt += "you are supposed to respond in the format as the below example"
        link_system_prompt += """
                            { "links": [
                                {"type": "about page", "url": "https://full.url/goes/here/about"},
                                {"type": "career page", "url": "https://full.url/goes/here/career"},
                                ]
                            }
                                """
        return link_system_prompt

        
    def get_link_user_prompt(self):
        self.get_content()
        link_user_prompt = f"Here is the list of links on the website of {self.url} - "
        link_user_prompt += "please decide which of these are relevant web links for a brochure about the company, \
                        respond with the full https URL in JSON format. \
                        Do not include Terms of Service, Privacy, email links.\n"
        link_user_prompt += "Links (some might be relative links):\n"
        link_user_prompt += "\n".join(self.links)
        return link_user_prompt

    
    def get_links(self):
        response = openai.chat.completions.create(
            model = MODEL,
            messages = [
                {'role':'system', 'content': self.get_link_system_prompt()},
                {'role':'user', 'content': self.get_link_user_prompt()}
            ],
            response_format={'type':'json_object'}
        )
        result = response.choices[0].message.content
        link_result = json.loads(result)
        return link_result

    
    def get_all_content(self):
        result = f"found links: {self.get_links()} \n"
        result += "Landing Page:\n"
        result += f"{self.get_content()}"
        return result

    
    def get_brochure_system_prompt(self):
        navigate_to_link_system_prompt = "You are an assistant that analyzes the contents of several \
                            relevant pages from a company website \
                            and creates a short brochure about the company for prospective customers, \
                            investors and recruits. Respond in markdown.\
                            Include details of company culture, customers and careers/jobs \
                            if you have the information."
        return navigate_to_link_system_prompt
        
        
    def get_brochure_user_prompt(self, company_name):
        navigate_to_link_user_prompt =  f"You are looking at a company called: {company_name}\n"
        navigate_to_link_user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
        navigate_to_link_user_prompt += self.get_all_content()
        # navigate_to_link_user_prompt = navigate_to_link_user_prompt[:20_000] # Truncate if more than 20,000 characters
        return navigate_to_link_user_prompt

    
    def create_brochure(self, company_name):
        response = openai.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": self.get_brochure_system_prompt()},
                {"role": "user", "content": self.get_brochure_user_prompt(company_name)}
              ],
        )
        result = response.choices[0].message.content
        display(Markdown(result))

    
    def create_brochure_with_ollama(self, company_name):
        ollama_model =  'llama3.2'
        response = ollama.chat(model=ollama_model, 
                              messages=[
                            {"role": "system", "content": self.get_brochure_system_prompt()},
                            {"role": "user", "content": self.get_brochure_user_prompt(company_name)}
                          ]
                              )
        summary = response['message']['content']
        display(Markdown(summary))


    def create_brochure_with_ollama_running_locally(self, company_name):
        OLLAMA_API = "http://localhost:11434/api/chat"
        HEADERS = {'Content-Type':'application/json'}
        MODEL = 'llama3.2'
        payload = {
            "model": MODEL,
            "messages": [
                        {"role": "system", "content": self.get_brochure_system_prompt()},
                        {"role": "user", "content": self.get_brochure_user_prompt(company_name)}
                          ],
            "stream": False
        }
        response = requests.post(OLLAMA_API, json= payload, headers=HEADERS)
        summary = response.json()['message']['content']
        display(Markdown(summary))

In [138]:
web = Website("https://cogeco.ca")

In [140]:
web.create_brochure("Cogeco")

# Cogeco Company Brochure

## Overview
Cogeco is a leading telecommunications and media company in North America, dedicated to connecting customers with high-speed internet, television, and radio services. While we encountered a temporary security block on their website, we can provide insight into Cogeco based on general knowledge of the industry.

## Company Culture
At Cogeco, the company culture is centered around innovation, collaboration, and community engagement. Employees are encouraged to bring their unique perspectives to the table, fostering an environment of creativity and problem-solving. The organization values diversity and inclusion, ensuring that every voice is heard and respected. 

### Core Values
- **Customer Focus**: Ensuring exceptional service and support to enhance user experience.
- **Innovation**: Embracing and investing in new technologies to stay ahead.
- **Integrity**: Building trust with customers and partners through transparent practices.

## Customers
Cogeco serves a wide range of customers, from residential users to businesses of all sizes. Their services help individuals and families stay connected and entertained while enabling businesses to operate efficiently and grow through robust internet solutions.

### Customer Commitment
Cogeco is dedicated to delivering quality service, with features such as:
- High-speed internet
- Reliable television programming
- Comprehensive support services

## Careers & Job Opportunities
Cogeco offers a variety of job opportunities across multiple departments including service delivery, customer support, technical operations, and marketing. The company seeks passionate and motivated individuals who are eager to contribute to a dynamic team environment.

### Employee Benefits
- Competitive salaries and bonus opportunities
- Comprehensive healthcare plans
- Opportunities for professional development
- Supportive work-life balance initiatives

## Join Us
If you are looking for a fulfilling career in a fast-paced, innovative company that values its employees and community, consider exploring opportunities at Cogeco. For job openings, visit their careers page (once access is restored) or connect with them on professional networking platforms.

## Contact Us
For inquiries about services, partnerships, or career opportunities, please reach out through their official communication channels.

---

### Note:
For the most current information about Cogeco, including their latest services and job openings, it is advised to check their website directly once access issues are resolved.

## Let's try with Ollama

### Not what I expected 💩

In [134]:
web.create_brochure_with_ollama("Cogeco")

**Cogeco Brochure**
=====================

### About Us

Cogeco is a leading provider of innovative technology solutions, protecting and enhancing the online experience for individuals and organizations.

### Our Mission

To deliver high-performance, secure, and reliable solutions that empower our customers to succeed in an ever-evolving digital landscape.

### What We Do

As a trusted partner, we offer:

*   **Cloudflare Security**: Our website utilizes Cloudflare's cutting-edge security solutions to safeguard against online threats.
*   **Performance Optimization**: We strive to ensure seamless performance and fast loading times for our users.

### Stay Connected

If you're unable to access our website due to technical issues, please reach out to us through the contact form or email address provided on our site. Our team will be happy to assist you.

### Join Our Community

We value collaboration and innovation in our community. Explore opportunities to work with us:

[**Careers & Jobs**](link to careers page)

Our company culture emphasizes a passion for technology, teamwork, and continuous learning.

*   **Collaborative Environment**: We foster open communication and empower employees to grow and succeed.
*   **Innovation Hub**: Our team of experts stays at the forefront of technological advancements to drive growth and success.
*   **Professional Development**: We invest in our employees' skills and knowledge to ensure they thrive in their roles.

### Connect with Us

Cogeco is dedicated to delivering exceptional results for our customers. Learn more about our services:

[**Services & Solutions**](link to services page)

By choosing Cogeco, you're partnering with a trusted partner committed to your online success.

---

Note: The brochure was created based on the provided content and may not fully represent the company's actual culture, values, or offerings.

## Let's try generating the brochure with Ollama running locally
### 💩

In [139]:
web.create_brochure_with_ollama_running_locally("cogeco")

# Cogeco Brochure

## About Us

Cogeco is a leading provider of performance and security solutions, protecting websites like ours from online attacks. Our website may be blocked for your security, but we're here to tell you more about our company culture, customers, and career opportunities.

## Company Culture

At Cogeco, we're committed to delivering top-notch performance and security services that keep our clients safe and secure online. We believe in fostering a collaborative and innovative work environment where our team members can thrive and grow.

## Customers

We're proud to serve a wide range of customers across various industries, including [insert industry/sectors]. Our solutions help businesses and organizations protect their online presence from threats and ensure seamless performance for their users.

## Careers & Jobs

Join our team of experts dedicated to delivering exceptional performance and security services! We offer [insert job types, e.g., engineering, sales, support] roles that allow you to work on cutting-edge projects and contribute to the growth of our company. Check out our [insert career page link] for more information.

## Get in Touch

Ready to learn more about Cogeco's performance and security solutions? Email us at [insert contact email] or visit our website at cogeco.ca (when we're not blocked, that is!)

# gpt-4o-mini WINS!!! 😎

## OpenAI showed me how I can improve my code by making these changes 🤪 :
- Added error handling for requests and OpenAI API.
- Cached scraping results to avoid redundant calls.
- Resolved relative links.
- Centralized prompt creation logic.
- Enhanced maintainability by passing dependencies (headers, model) into the class.
- With these changes, your code becomes more robust, reusable, and efficient.

In [None]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import json
import openai
from markdown import Markdown

class Website:
    def __init__(self, url, headers, model):
        self.url = url
        self.headers = headers
        self.model = model
        self.body = None
        self.title = None
        self.text = None
        self.links = None

    def scrape_url(self):
        if self.body is not None:  # Skip if already scraped
            return
        try:
            response = requests.get(self.url, headers=self.headers, timeout=10)
            response.raise_for_status()
            self.body = response.content
            soup = BeautifulSoup(self.body, 'html.parser')
            self.title = soup.title.text if soup.title else "No Title"
            for not_needed in soup.body(["script", "style", "img", "input"]):
                not_needed.decompose()
            self.text = soup.body.get_text(separator='\n', strip=True)
            self.links = [urljoin(self.url, link.get('href')) for link in soup.find_all('a')]
        except requests.exceptions.RequestException as e:
            raise ValueError(f"Error fetching URL {self.url}: {e}")

    def get_content(self):
        self.scrape_url()
        return f"Website title: {self.title}, Website content: {self.text}"

    def create_prompt(self, prompt_type, extra_content=""):
        base_prompts = {
            "link_system": "You are an agent to reform a list of links ...",
            "brochure_system": "You are an assistant that analyzes the contents ..."
        }
        return base_prompts[prompt_type] + extra_content

    def get_links(self):
        self.scrape_url()
        system_prompt = self.create_prompt("link_system")
        user_prompt = f"Here is the list of links on {self.url}:\n" + "\n".join(self.links)
        try:
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[
                    {'role': 'system', 'content': system_prompt},
                    {'role': 'user', 'content': user_prompt}
                ]
            )
            result = response.choices[0].message.content
            return json.loads(result)
        except Exception as e:
            raise ValueError(f"Error generating links: {e}")

    def create_brochure(self, company_name):
        system_prompt = self.create_prompt("brochure_system")
        user_prompt = f"You are looking at {company_name}:\n{self.get_content()}"
        try:
            response = openai.ChatCompletion.create(
                model=self.model,
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ]
            )
            result = response.choices[0].message.content
            display(Markdown(result))
        except Exception as e:
            raise ValueError(f"Error generating brochure: {e}")


### The code looks sketchy 🤔
