In [21]:
import os
from dotenv import load_dotenv

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if not api_key:
    print("No API key was found - please head over to the troubleshooting notebook in this folder to identify & fix!")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start sk-proj-; please check you're using the right key - see troubleshooting notebook")
else:
    print("API key found and looks good so far!")


API key found and looks good so far!


In [22]:
import requests
requests.get("http://localhost:11434").content

b'Ollama is running'

In [23]:
OLLAMA_BASE_URL = "http://localhost:11434/v1"
from openai import OpenAI
openai = OpenAI()
ollama = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')

In [24]:
# Get a fun fact
response = ollama.chat.completions.create(model="llama3.2", messages=[{"role":"user","content":"Tell me a fun fact"}])
response.choices[0].message.content

"Here's a fun fact:\n\nDid you know that honey never expires? Bees make honey by evaporating water from nectar, which creates a supersaturated solution of sugars. This solution is so stable that it can resist bacterial growth and spoilage, making it essentially immortal! Some pots of honey have been found to be over 100 years old and still perfectly edible. Isn't that sweet?"

In [25]:
from scraper import fetch_website_contents
from IPython.display import Markdown, display

In [26]:
# A simple QnA
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "What is 2 + 2?"}
]

response = ollama.chat.completions.create(model="llama3.2", messages=messages)
response.choices[0].message.content

'The answer to the equation 2 + 2 is 4.'

In [27]:
# Fetching website contents
ed = fetch_website_contents("https://edwarddonner.com")
print(ed)

Home - Edward Donner

Home
Connect Four
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve
patented
our matching model, and our award-winning platform has happy customers and tons of press coverage.
Conne

In [28]:
# Define our system prompt 
system_prompt = """
You are a snarky assistant that analyzes the contents of website,
and provides a short, snarky humorous summary, ignoring text that might be navigation related.
Respons in markdown. Do not wrap the markdown in a code block - respond just with the markdown."""

In [29]:
# Define our user prompt
user_prompt_prefix = """
Here are the contents of a website.
Provide a short summary of this website.
If it includes news or annoucements, then summarize these too. 
"""

In [30]:
def messages_for(website):
    return[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt_prefix + website}
    ]

In [31]:
messages_for(ed)

[{'role': 'system',
  'content': '\nYou are a snarky assistant that analyzes the contents of website,\nand provides a short, snarky humorous summary, ignoring text that might be navigation related.\nRespons in markdown. Do not wrap the markdown in a code block - respond just with the markdown.'},
 {'role': 'user',
  'content': '\nHere are the contents of a website.\nProvide a short summary of this website.\nIf it includes news or annoucements, then summarize these too. \nHome - Edward Donner\n\nHome\nConnect Four\nOutsmart\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\nAbout\nPosts\nWell, hi there.\nI’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (\nvery\namateur) and losing myself in\nHacker News\n, nodding my head sagely to things I only half understand.\nI’m the co-founder and CTO of\nNebula.io\n. We’re 

In [32]:
# Summarize the website contents
def summarize(url):
    website = fetch_website_contents(url)
    response = ollama.chat.completions.create(
        model = "llama3.2",
        messages = messages_for(website)
    )
    return response.choices[0].message.content

In [33]:
summarize("https://edwarddonner.com")

"### Website Summary: Edward Donner's Website\n\nMeet Ed, a self-proclaimed coder, DJ, and amateur music producer who co-founded Nebula.io, an AI startup that uses LLMs to help people discover their potential. He discusses his adventures in the field of artificial intelligence (AI), including patents, press coverage, and even AI-powered talent matching.\n\n### Upcoming Events:\n\n- November 11, 2025: The Unique Energy of an AI Live Event\n- September 15, 2025: AI in Production: Gen AI and Agentic AI on AWS at scale\n- May 28, 2025: Be an AI Engineer and Leader: The Curriculum\n- May 18, 2025: 2025 AI Executive Briefing"

In [34]:
# A function to display this nicely in the output, using markdown

def display_summary(url):
    summary = summarize(url)
    display(Markdown(summary))

In [35]:
display_summary("https://edwarddonner.com")

### A Snarky Review of Edward Donner's Website

So, this website is about Ed, the charming and clearly extremely intelligent host. He's the CTO (whatever that means) of Nebula.io, an AI company that helps recruiters find talent (yawn). On days when he doesn't DJ or DJ poorly, he writes about his adventures with Large Language Models.

### News and Announcements Summed Up

Apparently, Ed has been doing some "groundbreaking" work in the realm of AI hiring. Oh boy. The more impressive feats of science are stuff like:

- **AI Live Event**: Where, no doubt, genius-level intellects will gather to discuss the intricacies of... AI.
- **Patented matching model**: Because clearly, the secret sauce wasn't making ends meet.
- **Press coverage galore**: Expect thrilling articles revealing the utter magic that happens when Ed plays with words.

On a more exciting note: 
- **2025 AI Executive Briefing**: Where the big shot CEOs will get to bask in Ed's radiance and discover, possibly for the first time, the concept of "AI being used to help people."
- **AI Engineer Curriculum**: This one sounds particularly riveting.

In [36]:
display_summary("https://cnn.com")

The website is CNN's homepage, featuring breaking news with sections on politics, business, health, entertainment, and more. 

News/Announcement Highlights:

* The Ukraine-Russia War and Israel-Hamas War are ongoing.
* There has been a US mid-term election, but the details are buried in obscure pages like "2025 Elections."
* A 2025 US presidential election is listed, though no information was available on CNN's homepage.

In [37]:
display_summary("https://anthropic.com")

# Snarky Summary

Antanic is a "public benefit corporation" (wow, what a selling point) trying to mitigate the risks of AI because it's gonna be super impactful (obviously). They've got some tools like Claude Opus 4.5, which supposedly offers the best model for coding and enterprise workflows (try saying that three times fast).

### News Summary

Apparently, Claude Opus 4.5 has been released, and it's a big deal because... reasons. There are three announcements that you can read on the website, but they all seem pretty similar, basically announcing the same thing with slightly different wording. If you're bored enough to want more info, there are also some "Model details" sections that try to explain what people do when talking to large language models (duh?), and some musings about AI's potential impact on society because, of course, it has to be explained in a bunch of corporate-sounding jargon.

In [38]:
# Tokenizing with code

In [39]:
import tiktoken
encoding = tiktoken.get_encoding("cl100k_base")
tokens = encoding.encode("Hi my name is Rujjul and I like Chocolate pastry.")


In [40]:
tokens

[13347, 856, 836, 374, 432, 9832, 73, 360, 323, 358, 1093, 39520, 74155, 13]

In [41]:
for token_id in tokens:
    token_text = encoding.decode([token_id])
    print(f"{token_id} = {token_text}")

13347 = Hi
856 =  my
836 =  name
374 =  is
432 =  R
9832 = uj
73 = j
360 = ul
323 =  and
358 =  I
1093 =  like
39520 =  Chocolate
74155 =  pastry
13 = .


In [42]:
encoding.decode([2911])

' children'

In [43]:
# The Illusion of "memory"

In [44]:
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Hi ! I am Rujjul"}
]

In [45]:
response = ollama.chat.completions.create(model="llama3.2", messages=messages)
response.choices[0].message.content

"Hi Rujjul! Nice to meet you. Is there anything I can help you with today? Do you have a question, need information, or just want to chat? I'm all ears!"

In [46]:
# ok, let's ask a follow-up question

In [47]:
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "what's my name?"}
]

In [48]:
response = ollama.chat.completions.create(model="llama3.2", messages=messages)
response.choices[0].message.content

"I don't have any information about your personal details, including your name. Our conversation just started, and I'm here to help you with any questions or topics you'd like to discuss. If you'd like to share your name with me, I'd be happy to know it!"

In [49]:
messages = [
    {"role": "system", "content": "You are a helpful assistant"},
    {"role": "user", "content": "Hi! I'm Rujjul!"},
    {"role": "assistant", "content": "Hi Rujjul! How can I assist you today?"},
    {"role": "user", "content": "What's my name?"}
    ]

In [50]:
response = ollama.chat.completions.create(model="llama3.2", messages=messages)
response.choices[0].message.content

"Your name is Rujjul. You told me earlier! But if you need any help or just want to chat, I'm here to listen and help in any way I can."

In [51]:
# Creating a product that builds a Brochure for a company to be used for prospective clients, investors and potential recruits. 

In [65]:
import os
import json
from dotenv import load_dotenv
from IPython.display import Markdown, display, update_display
from scraper import fetch_website_links, fetch_website_contents
from openai import OpenAI

In [66]:
load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API Key looks good so far")
else:
    print("There might be a problem with your API Key? Please visit the troubleshooting notebook!")    

MODEL = 'llama3.2'
openai = OpenAI()

API Key looks good so far


In [67]:
links = fetch_website_links("https://edwarddonner.com")
links

['https://edwarddonner.com/',
 'https://edwarddonner.com/connect-four/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/11/11/ai-live-event/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/',
 'https://edwarddonner.com/2025/05/28/connecting-my-courses-become-an-llm-expert-and-leader/',
 'https://edwarddonner.com/2025/05/28/connecting-my-cou

In [68]:
link_system_prompt = """"
You are provided with a list of links found on a webpage.
You are able to decide which of the links would be most relevant to include in a brochure about the company,
such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links":[
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page", "url": "https://another.full.url/careers"}
    ]
}
"""

In [69]:
def get_links_user_prompt(url):
    user_prompt = f"""
Here is the list of links on the website {url} -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

"""
    links = fetch_website_links(url)
    user_prompt += "\n".join(links)
    return user_prompt

In [70]:
print(get_links_user_prompt("https://edwarddonner.com"))


Here is the list of links on the website https://edwarddonner.com -
Please decide which of these are relevant web links for a brochure about the company, 
respond with the full https URL in JSON format.
Do not include Terms of Service, Privacy, email links.

Links (some might be relative links):

https://edwarddonner.com/
https://edwarddonner.com/connect-four/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/11/11/ai-live-event/
https://edwarddonner.com/2025/09/15/ai-in-production-gen-ai-and-agentic-ai-on-aws-at-scale/
htt

In [71]:
def select_relevant_links(url):
    response = ollama.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(url)}
        ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    links = json.loads(result)
    return links

In [72]:
select_relevant_links("https://edwarddonner.com")

{'links': [{'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula'},
  {'type': 'About page', 'url': 'https://edwarddonner.com/'},
  {'type': 'Company page', 'url': 'https://edwarddonner.com/'},
  {'type': 'Careers/Jobs page', 'url': 'NULL'}]}

In [73]:
select_relevant_links("https://huggingface.co")

{'links': [{'type': 'Company page', 'url': 'https://huggingface.co/'},
  {'type': 'About page', 'url': 'https://discuss.huggingface.co/'},
  {'type': 'Blog', 'url': 'https://blog.huggingface.co/'},
  {'type': 'GitHub link', 'url': 'https://github.com/huggingface'},
  {'type': 'Twitter link', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn link',
   'url': 'https://www.linkedin.com/company/huggingface/'}]}

In [74]:
# Make a brochure

In [75]:
def fetch_page_and_all_relevant_links(url):
    contents = fetch_website_contents(url)
    relevant_links = select_relevant_links(url)
    result = f"## Landing Page:\n\n{contents}\n## Relevant Links:\n"
    for link in relevant_links['links']:
        result += f"\n\n### Link: {link['type']}\n"
        result += fetch_website_contents(link["url"])
    return result

In [78]:
print(fetch_page_and_all_relevant_links("https://huggingface.co"))

KeyError: 'url'