# End of week 1 exercise

To demonstrate your familiarity with OpenAI API, and also Ollama, build a tool that takes a technical question,  
and responds with an explanation. This is a tool that you will be able to use yourself during the course!

In [1]:
# imports
import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI


In [2]:
# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = 'llama3.2'

In [4]:
# set up environment
# Initialize and constants

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [5]:
# A class to represent a Webpage

class Website:
    """
    A utility class to represent a Website that we have scraped, now with links
    """

    def __init__(self, url):
        self.url = url
        response = requests.get(url)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [17]:
ed=Website("https://www.edwarddonner.com")
ed.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/',
 'https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/',
 'https://edwarddonner.com/2024/08/06/outsmart/',
 'https://edwarddonner.com/2024/08/06/outsmart/',
 'https://edwarddonner.com/2024/06/26/choosing-the-right-llm-resources/

In [29]:
#multi shots prmopting 
#shot 1) you should respond in JSON... 
#shot 2) you should also respond in multi colored text

link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in yellow text."
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [30]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in yellow text.You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [36]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "print output in yellow text\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [38]:
print(get_links_user_prompt(ed))

Here is the list of links on the website of https://www.edwarddonner.com - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
print output in yellow text
Links (some might be relative links):
https://edwarddonner.com/
https://edwarddonner.com/outsmart/
https://edwarddonner.com/about-me-and-about-nebula/
https://edwarddonner.com/posts/
https://edwarddonner.com/
https://news.ycombinator.com
https://nebula.io/?utm_source=ed&utm_medium=referral
https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
https://patents.google.com/patent/US20210049536A1/
https://www.linkedin.com/in/eddonner/
https://edwarddonner.com/2024/11/13/llm-engineering-resources/
https://edwarddonner.com/2024/11/13/llm-engineering-resources/
https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist

In [39]:
def get_links(url):
    website = Website(url)
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
      ],
        response_format={"type": "json_object"}
    )
    result = response.choices[0].message.content
    return json.loads(result)

In [40]:
anthropic = Website("https://edwarddonner.com")
anthropic.links

['https://edwarddonner.com/',
 'https://edwarddonner.com/outsmart/',
 'https://edwarddonner.com/about-me-and-about-nebula/',
 'https://edwarddonner.com/posts/',
 'https://edwarddonner.com/',
 'https://news.ycombinator.com',
 'https://nebula.io/?utm_source=ed&utm_medium=referral',
 'https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html',
 'https://patents.google.com/patent/US20210049536A1/',
 'https://www.linkedin.com/in/eddonner/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'https://edwarddonner.com/2024/11/13/llm-engineering-resources/',
 'https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/',
 'https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/',
 'https://edwarddonner.com/2024/08/06/outsmart/',
 'https://edwarddonner.com/2024/08/06/outsmart/',
 'https://edwarddonner.com/2024/06/26/choosing-the-right-llm-resources/

In [35]:
get_links("https://edwarddonner.com")

{'links': [{'type': 'about page',
   'url': 'https://edwarddonner.com/about-me-and-about-nebula/'},
  {'type': 'company page', 'url': 'https://edwarddonner.com/outsmart/'},
  {'type': 'posts page', 'url': 'https://edwarddonner.com/posts/'}]}

In [41]:
#make a brouchore now
def get_all_details(url):
    result = "Landing page:\n"
    result += Website(url).get_contents()
    links = get_links(url)
    print("Found links:", links)
    for link in links["links"]:
        result += f"\n\n{link['type']}\n"
        result += Website(link["url"]).get_contents()
    return result

In [43]:
print(get_all_details("https://edwarddonner.com"))

Found links: {'links': [{'type': 'about page', 'url': 'https://edwarddonner.com/about-me-and-about-nebula/'}]}
Landing page:
Webpage Title:
Home - Edward Donner
Webpage Contents:
Home
Outsmart
An arena that pits LLMs against each other in a battle of diplomacy and deviousness
About
Posts
Well, hi there.
I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (
very
amateur) and losing myself in
Hacker News
, nodding my head sagely to things I only half understand.
I’m the co-founder and CTO of
Nebula.io
. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt,
acquired in 2021
.
We work with groundbreaking, proprietary LLMs 

In [78]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information.\
Output should be displayed in mindmap diagram format.\
Also output should be in Hindi lanaguage."



In [79]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:20_000] # Truncate if more than 20,000 characters
    return user_prompt

In [80]:
get_brochure_user_prompt("Edward Donner", "https://edwarddonner.com") 

Found links: {'links': [{'type': 'about page', 'url': 'https://edwarddonner.com/about-me-and-about-nebula/'}]}


'You are looking at a company called: Edward Donner\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nHome - Edward Donner\nWebpage Contents:\nHome\nOutsmart\nAn arena that pits LLMs against each other in a battle of diplomacy and deviousness\nAbout\nPosts\nWell, hi there.\nI’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (\nvery\namateur) and losing myself in\nHacker News\n, nodding my head sagely to things I only half understand.\nI’m the co-founder and CTO of\nNebula.io\n. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the 

In [82]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [83]:
create_brochure("Anthropic", "https://anthropic.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://anthropic.com/company'}, {'type': 'careers page', 'url': 'https://anthropic.com/careers'}, {'type': 'team page', 'url': 'https://anthropic.com/team'}]}


```markdown
# Anthropic कंपनी ब्रोशर

## कंपनी की जानकारी
- **नाम:** Anthropic
- **मुख्यालय:** सैन फ्रांसिस्को
- **व्यापार प्रकार:** AI सुरक्षा और अनुसंधान कंपनी
- **उद्देश्य:** विश्व में AI का सकारात्मक प्रभाव सुनिश्चित करना 

## हमारे उत्पाद
- **Claude 3.5 Sonnet:** Intelligent AI मॉडल
- **Claude API:** व्यवसायों के लिए AI की शक्ति का उपयोग करें
- **Claude for Enterprise:** विशेष रूप से व्यावसायिक उपयोग के लिए डिज़ाइन किया गया

## कंपनी संस्कृति
- **उच्च विश्वास:** ईमानदारी, समझदारी और सहयोग पर आधारित वातावरण।
- **एक बड़ी टीम:** सभी टीमों के बीच सहयोग और विचारों का आदान-प्रदान।
- **सरलता पर जोर:** जटिलता से बचना और व्यावहारिक समाधान पर ध्यान केंद्रित करना।
- **सुरक्षा एक विज्ञान:** उत्पादों में सुरक्षा तकनीकों का उपयोग और साझा करना।

## ग्राहक
- विविध इंडस्ट्रीज: व्यवसाय, गैर-लाभकारी संगठन और नागरिक समाज समूह
- **उपभोक्ताओं का फीडबैक:** व्यवसायों में कार्यक्षमता को बढ़ाने के लिए Claude का इस्तेमाल करने वाले ग्राहकों से सकारात्मक अनुभव।

## करियर और नौकरियां
- **ओपन रोल्स:** अनुसंधान, इंजीनियरिंग, नीति, और संचालन में विविध पृष्ठभूमियों के लिए।
- **लाभ और भत्ते:**
  - स्वास्थ्य, दंत और दृष्टि बीमा
  - 22 सप्ताह का भुगतान किया गया माता-पिता अवकाश
  - प्रतिस्पर्धी वेतन और शेयर पैकेज
  - दूरस्थ काम की चालकता

## करियर में शामिल कैसे हों
1. **रिज़्युमे सबमिट करें**
2. **चर्चा करें:** आपकी रुचियों और अनुभव के बारे में।
3. **कौशल आकलन:** तकनीकी और राजनीतिक भूमिकाओं के लिए परीक्षण।

## संपर्क जानकारी
- **सोशल मीडिया:** ट्विटर, लिंक्डइन, यूट्यूब
- **वेबसाइट:** [Anthropic](https://www.anthropic.com)

Anthropic में शामिल होने और एआई की भविष्यवाणी को सुरक्षित करने के लिए आइए!
```

### मानसिक मानचित्र
```plaintext
Anthropic
│
├── कंपनी की जानकारी
│   ├── नाम: Anthropic
│   ├── मुख्यालय: सैन फ्रांसिस्को
│   └── उद्देश्य: AI का सकारात्मक प्रभाव
│
├── उत्पाद
│   ├── Claude 3.5 Sonnet
│   ├── Claude API
│   └── Claude for Enterprise
│
├── कंपनी संस्कृति
│   ├── उच्च विश्वास
│   ├── एक बड़ी टीम
│   ├── सरलता पर जोर
│   └── सुरक्षा एक विज्ञान
│
├── ग्राहक
│   ├── विविध इंडस्ट्रीज
│   └── उपभोक्ताओं का फीडबैक
│
└── करियर और नौकरियां
    ├── ओपन रोल्स
    ├── लाभ और भत्ते
    └── शामिल होने की प्रक्रिया
``` 

इस ब्रोशर का लक्ष्य संभावित ग्राहकों, निवेशकों और कर्मचारियों को Anthropic के बारे में जानकारी प्रदान करना और इसकी लाभदायक और सहयोगात्मक संस्कृति को उजागर करना है।

In [84]:
#stream brochure
def stream_brochure(company_name, url):
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)


In [89]:
stream_brochure("HuggingFace", "https://huggingface.com")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.com/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.com/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.com/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.com/blog'}]}



# Hugging Face Brochure

## 🤖 कंपनी का परिचय
- **नाम:** Hugging Face
- **संक्षेप:** Hugging Face एक AI समुदाय है जो भविष्य का निर्माण कर रहा है। यह मशीन लर्निंग समुदाय के लिए एक ऐसा प्लेटफॉर्म है, जहाँ मॉडल, डेटासेट और अनुप्रयोगों पर सहयोग किया जा सकता है।

## 🌟 हमारी विशेषताएँ
- **मॉडल:** 400k+ मॉडल्स की मेज़बानी।
- **डेटासेट्स:** 100k+ डेटासेट्स उपलब्ध हैं।
- **स्पेस:** 150k+ एप्लिकेशन्स का संग्रह।
- **तकनीक:** ओपन-सोर्स तकनीक के साथ मशीन लर्निंग में सहयोग बढ़ाना।
- **एन्टरप्राइज समाधान:** 20 डॉलर प्रति उपयोगकर्ता प्रति माह से शुरू।

## 🌍 ग्राहक
- **उद्योग:** 50,000+ संगठन Hugging Face का उपयोग करते हैं, जिनमें Meta, Google, Microsoft जैसे बड़े नाम शामिल हैं।

## 🌈 कंपनी संस्कृति
- **लक्ष्य:** अच्छे मशीन लर्निंग को लोकतांत्रिक बनाने का मिशन।
- **समुदाय:** सभी को जुड़ने, साझा करने और सहयोग करने हेतु आमंत्रित किया जाता है।

## 💼 करियर
- **संभावनाएँ:** स्थानीय और वैश्विक स्तर पर कई रोजगार अवसर उपलब्ध हैं।
- **संस्कृति:** एक सहयोगात्मक वातावरण जिसमें नवाचार और सामूहिक सीखने पर जोर दिया जाता है।

## 💬 संपरक
- **वेबसाइट:** [Hugging Face](https://huggingface.co)
- **सोशल मीडिया:** GitHub, Twitter, LinkedIn और Discord पर हमें फॉलो करें।

## 📝 मूल्य निर्धारण
- **फ्री:** बेसिक उपयोग के लिए हमेशा मुफ़्त।
- **प्रो:** $9 प्रति माह के लिए एडवांस फीचर्स।
- **एंटरप्राइज:** सुरक्षा और विशेष समर्थन के साथ सर्वश्रेष्ठ प्लेटफार्म।

---

### 👥 जुड़ें
आप हमारी टीम का हिस्सा बन सकते हैं और इस ऊर्जा से भरे समुदाय में शामिल हो सकते हैं। यदि आप अच्छा मशीन लर्निंग विकास करने में रुचि रखते हैं, तो **आज ही शामिल हों!**



In [90]:
#define new system prompt for the question below

system_prompt = "Output should be in both English and Hindi lanaguage."

In [99]:
# here is the question; type over this to ask something new , i.e. user_prompt

user_prompt = question = """
Please explain what this code does and why:
yield from {book.get("author") for book in books if book.get("author")}
"""

In [100]:
# Get gpt-4o-mini to answer, with streaming
#stream result
def stream_code_explanation():
    stream = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
          ],
        stream=True
    )
    
    response = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        response += chunk.choices[0].delta.content or ''
        response = response.replace("```","").replace("markdown", "")
        update_display(Markdown(response), display_id=display_handle.display_id)

In [101]:
stream_code_explanation()

This code snippet uses a generator expression combined with a set comprehension. Let's break down what it does:

### Explanation:

1. **`{book.get("author") for book in books if book.get("author")}`**: 
   - This part is a set comprehension. It iterates through each `book` in the `books` collection (which is assumed to be a list or similar iterable).
   - For each `book`, it retrieves the value associated with the key `"author"` using the `book.get("author")` method.
   - The `if book.get("author")` condition ensures that only books that have an author value (i.e., not `None` or an empty string) are considered.
   - Since this is a set comprehension, it will only include unique authors in the resulting set.

2. **`yield from`**: 
   - The `yield from` statement is used in a generator function to yield all values from the iterable that follows it. In this case, it yields all the unique authors obtained from the set comprehension.
   
### Purpose and Use:
The purpose of this code is to create a generator that yields unique authors from a list of books, excluding any entries that lack an author.

### Example Use Case:
Suppose you have a collection of book records, and you want to create a list of distinct authors for further processing or display. This code effectively filters out any entries without valid author names and provides a way to iterate over only unique authors.

### Hindi Explanation:

यह कोड स्निपेट एक जनरेटर एक्सप्रेशन और सेट कम्प्रिहेंशन का उपयोग करता है। आइए इसे विभाजित करें:

1. **`{book.get("author") for book in books if book.get("author")}`**: 
   - यह एक सेट कम्प्रिहेंशन है। यह `books` संग्रह (जो एक सूची या समान इटरेबल समझा जा रहा है) में हर `book` के लिएiterate करता है।
   - हर `book` के लिए, यह `book.get("author")` विधि का उपयोग करके `"author"` कुंजी से संबंधित मान प्राप्त करता है।
   - `if book.get("author")` शर्त यह सुनिश्चित करती है कि केवल वही पुस्तकें जिनका लेखक मान (यानी, `None` या एक खाली स्ट्रिंग नहीं है) पर विचार किया जाए।
   - चूंकि यह एक सेट कम्प्रिहेंशन है, यह अद्वितीय लेखकों को परिणामी सेट में शामिल करेगा।

2. **`yield from`**: 
   - `yield from` वाक्यांश जनरेटर फ़ंक्शन में एक इटरेबल से सभी मानों को निकालने के लिए प्रयोग किया जाता है जो इसके बाद आता है। इस मामले में, यह सेट कम्प्रिहेंशन से प्राप्त अद्वितीय लेखकों के सभी मानों को प्रदान करता है।

### उद्देश्य और उपयोग:
इस कोड का उद्देश्य पुस्तकों की सूची से अद्वितीय लेखकों को उत्पन्न करने के लिए एक जनरेटर बनाना है, जिसमें किसी भी प्रविष्टियों को बाहर रखा गया है जिनमें लेखक नहीं है।

### उदाहरण उपयोग मामला:
मान लीजिए कि आपके पास पुस्तक रिकॉर्ड का एक संग्रह है, और आप विशिष्ट लेखकों की एक सूची बनाना चाहते हैं ताकि उसे आगे की प्रक्रिया या प्रदर्शन के लिए साझा किया जा सके। यह कोड केवल मान्य लेखक नामों को प्रदर्शित करके आपको केवल अद्वितीय लेखकों पर इटरट करने का एक तरीका प्रदान करता है।

In [124]:
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}

In [132]:
# Create a messages list using the same format that we used for OpenAI
messages = [
    {"role": "user", "content": "Please explain what this code does and why: yield from {book.get(\"author\") for book in books if book.get(\"author\")}"}
]

In [133]:
payload = {
        "model": MODEL_LLAMA,
        "messages": messages,
        "stream": False
    }

In [135]:
# Get Llama 3.2 to answer
response = requests.post(OLLAMA_API, json=payload, headers=HEADERS)
print(response.json()['message']['content'])

This line of code is written in Python and utilizes a feature called "yield from" which was introduced in Python 3.3.

**What it does:**

The `yield from` statement is used to yield results from another iterable. In this specific case, it's used to generate an iterator that yields the authors of books found in the `books` collection.

Here's a breakdown:

- `{book.get("author") for book in books if book.get("author")}`: This is a generator expression. It creates an iterable (an iterator) that generates values from each iteration of the loop.
  - `for book in books`: Loops through each item (`book`) in the `books` collection.
  - `if book.get("author")`: Only includes items where the "author" key exists and its value is not empty or None. This is to filter out any dictionaries that don't have an author.
  - `book.get("author")`: Retrieves the value of the "author" key from each filtered dictionary.

- `yield from ...`: Yields all values generated by the inner iterable (the generator exp