Import Statements

In [40]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

Load in the Env and our API Key

In [41]:
# Load environment variables in a file called .env
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

# Check the key
if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
elif api_key.strip() != api_key:
    print("An API key was found, but it looks like it might have space or tab characters at the start or end - please remove them - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


Beautiful Soup Web Parser Class

In [42]:
openai = OpenAI()

# Some websites need you to use proper headers when fetching them:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:

    def __init__(self, url):
        """
        Create this Website object from the given url using the BeautifulSoup library
        """
        self.url = url
        response = requests.get(url, headers=headers)

        # Beautiful soup is a package for web scraping
        soup = BeautifulSoup(response.content, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        for irrelevant in soup.body(["script", "style", "img", "input"]):
            irrelevant.decompose()
        self.text = soup.body.get_text(separator="\n", strip=True)

System  Prompt

In [43]:
# The system prompt is what explains the context of the situation to the frontier model
# It tells them what kind of task they are performing and what tone to use

system_prompt = "You are an assistant that analyzes the contents of a website \
and provides a short summary, ignoring text that might be navigation related. \
Respond in markdown."

User Prompt

In [44]:
# A function that writes a User Prompt that asks for summaries of websites:
# The user prompt is the actual conversation itself
# The converstaion start and the role of the LLM is to figure out what way to respond to the user prompt in the context of the system prompt

def user_prompt_for(website):
    user_prompt = f"You are looking at a website titled {website.title}"
    user_prompt += "\nThe contents of this website is as follows; \
please provide a short summary of this website in markdown. \
Please list off his skills, then summarize these too.\n\n"
    user_prompt += website.text
    return user_prompt

Message Function

In [45]:
# See how this function creates exactly the format above

def messages_for(website):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_for(website)}
    ]

Calling OpenAI API

In [46]:
# And now: call the OpenAI API. You will get very familiar with this!

def summarize(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model = "gpt-4o-mini",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

Display Summary Function

In [47]:
# A function to display this nicely in the Jupyter output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [48]:
display_summary("https://www.linkedin.com/in/ericoliver12/")

# Summary of Eric Oliver's LinkedIn Profile

Eric Oliver is a dedicated Computer Science student at the University of Texas at Tyler, specializing in machine learning, artificial intelligence (AI), and data processing. He has recently accepted a position as an Undergraduate Research Assistant, indicating his commitment to gaining hands-on experience in his field.

## Skills

- Machine Learning
- Data Analysis
- Deep Learning
- Database Management
- Software Development
- Data Mining
- Cyber Risk Analysis
- Algorithm Analysis

## Skills Summary

### Machine Learning
Eric has experience developing and implementing machine learning models, with significant work on projects involving data classification and prediction.

### Data Analysis
He possesses strong analytical skills, enabling him to clean, process, and analyze large datasets for meaningful insights.

### Deep Learning
His involvement in deep learning projects demonstrates proficiency in using complex neural networks for advanced modeling tasks.

### Database Management
Eric has skills in designing and managing databases, with experience using SQL for data normalization and query execution.

### Software Development
He has experience in software development, showcasing his ability to build applications and systems as part of collaborative projects.

### Data Mining
He has knowledge in extracting patterns and knowledge from large datasets, contributing to the overall project efficiency.

### Cyber Risk Analysis
His studies include understanding cyber risks, which is essential in current data-driven environments.

### Algorithm Analysis
Eric is familiar with analyzing algorithms, indicating a strong foundation in computer science principles which supports his technical skill set.