In [47]:
# imports

import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

In [48]:
# Load environment variables in a file called .env

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the keys
def verify_api_key():
    if not api_key:
        print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
    elif not api_key.startswith("sk-proj-"):
        print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
    elif api_key.strip() != api_key:
        print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
    else:
        print("API key found and looks good so far!")

API key found and looks good so far!


In [49]:
# Verify we have the proper looking key
verify_api_key()

# Create an OpenAI client
openai = OpenAI()

In [50]:
# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    '''Class Website to represent a webpage'''

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        self.text = "no data"
  

    def getWebsiteData(self):
        response = requests.get(self.url, headers=headers)
        self.soup = BeautifulSoup(response.content, 'html.parser')
        self.title = self.soup.title.string if self.soup.title else "No title found"
        for irrelevant in self.soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = self.soup.body.get_text(separator="\n", strip=True)


    def getSoup(self): 
        return self.soup

In [51]:
# Define our system prompt for the initial ingestion of the web pages
system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

In [52]:
def generate_llm_messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

In [53]:
def summarizeWebsite(url):
    website = Website(url)
    website.getWebsiteData()
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = generate_llm_messages_for(website)
    )
    return response.choices[0].message.content

In [55]:
techSitesToVisit = ["https://cnn.com/business/tech",
                    "https://techcrunch.com",   
                    "https://theverge.com",
                    "https://wired.com",
                    "https://engadget.com",
                    "https://thenextweb.com",
                    "https://arstechnica.com"]

summaries = []

for techSite in techSitesToVisit:
    summary = summarizeWebsite(techSite)
    summaries.append(summary)
    print(summary)

# Summary of Tech | CNN Business

The **Tech | CNN Business** section focuses on the latest developments and trends in technology and business, featuring articles on prominent companies, innovations, and market dynamics. 

## Latest News Highlights

- **Texas Instruments** announced an investment of over **$60 billion** to expand semiconductor manufacturing in the US.
- **Meta** is reportedly offering **$100 million** to poach employees from rival companies.
- **Taiwan** has placed companies linked to China's AI ambitions on an export control list.
- **Google, Meta, and Snap** are exploring new technologies believed to be the next big thing in tech.
- **Nvidia** is excluding China from its future forecasts due to US chip export controls.
- **Disney and Universal** have initiated legal action against the AI photo generator **Midjourney**, alleging copyright infringement.

This section also addresses ongoing issues such as internet outages affecting major companies and discussions around

In [59]:
# Define our system prompt for the combination and summaries


def generate_final_llm_messages(summaries):
    system_prompt = "You are an assistant will receive summaries of websites.  You will combine then into an integrated summary of the websites.  Please respond in markdown."
    user_prompt = "Combine these summaries into a single summary of the websites."

    for summary in summaries:
        user_prompt += "\n\n" + summary

    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

In [62]:
response = openai.chat.completions.create(
    model = "gpt-4o-mini",
    messages = generate_final_llm_messages(summaries)
)

final_response = response.choices[0].message.content

In [64]:
print(final_response)

# Integrated Summary of Technology Websites

This summary amalgamates insights from various respected technology news platforms, providing a comprehensive overview of current trends, announcements, and innovations within the tech industry.

## Major Developments and Trends

1. **Investment and Manufacturing Initiatives**:
   - **Texas Instruments** has pledged over **$60 billion** to bolster semiconductor manufacturing in the U.S., reflecting a significant commitment to domestic production amid ongoing supply chain challenges.
   - European VCs are investing in breakthrough technologies, such as a fusion energy project and Finland's sand battery initiative for sustainable heating.

2. **Corporate Moves**:
   - **Meta** is reportedly offering **$100 million** to attract talent from rival tech firms, showcasing competitive dynamics in the workforce.
   - **Amazon** announced layoffs linked to improvements in AI efficiency, indicating shifts in workforce structures due to technological ad