In [1]:
# imports
# If these fail, please check you're running from an 'activated' environment with (llms) in the command prompt

import os
import requests
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI
import gradio as gr

In [2]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
MODEL = 'gpt-4o-mini'
openai = OpenAI()

API key looks good so far


In [40]:
system_prompt = "You are a web content extractor specialized in reading and understanding university and college websites. Your job is to identify and list all available courses, the description and other information related to it offered by the institution, including undergraduate and postgraduate programs. If the site includes grouped data like faculties, departments, or course categories, preserve that structure in the output."

In [41]:
def get_brochure_courses_user_prompt(university_name, url):
    user_prompt = """Please extract and list all the courses available from the following university website content:

Organize the output as follows:
- Undergraduate Courses
- Postgraduate Courses
- Other Programs (Diplomas, Certificates, etc.)

If a course includes a link, keep it with the course name."""
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5000]  # Truncate if more than 5,000 characters
    return user_prompt

In [42]:
def create_brochure(company_name, url):
    response = openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_courses_user_prompt(university_name, url)}
          ],
    )
    result = response.choices[0].message.content

In [43]:
def stream_gpt(prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt}
      ]
    stream = openai.chat.completions.create(
        model='gpt-4o-mini',
        messages=messages,
        stream=True
    )
    result = ""
    for chunk in stream:
        result += chunk.choices[0].delta.content or ""
        yield result

In [44]:
def stream_claude(prompt):
    result = claude.messages.stream(
        model="claude-3-haiku-20240307",
        max_tokens=1000,
        temperature=0.7,
        system=system_prompt,
        messages=[
            {"role": "user", "content": prompt},
        ],
    )
    response = ""
    with result as stream:
        for text in stream.text_stream:
            response += text or ""
            yield response

In [45]:
def get_website_text(url):
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
        )
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, "html.parser")

    # Remove irrelevant tags
    for tag in soup(["script", "style", "img", "input", "nav", "footer"]):
        tag.decompose()

    # Extract text from body
    return soup.body.get_text(separator="\n", strip=True) if soup.body else "No content found."

In [48]:
def stream_courses(university_name, url, model):
    prompt = (
        f"You are a specialized web content extractor focused on university websites. "
        f"Your task is to identify and list all the academic programs offered by {university_name}.\n"
        f"Here is the content of their landing page and other relevant sections:\n"
    )
    prompt += get_website_text(url)
    prompt = prompt[:20000]  

    if model == "GPT":
        result = stream_gpt(prompt)
    elif model == "Claude":
        result = stream_claude(prompt)
    else:
        raise ValueError("Unknown model selected. Choose 'GPT' or 'Claude'.")

    yield from result

In [49]:
view = gr.Interface(
    fn=stream_courses,  
    inputs=[
        gr.Textbox(label="University or College Name:"),
        gr.Textbox(label="Website URL (include http:// or https://):"),
        gr.Dropdown(["GPT", "Claude"], label="Select Model")
    ],
    outputs=gr.Markdown(label="Extracted Courses:"),
    flagging_mode="never"
)

view.launch(share=True)

* Running on local URL:  http://127.0.0.1:7887
* Running on public URL: https://2c11d51342a4311df3.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [24]:
stream_brochure("Kora AI Tech", "https://www.koraaitech.com/")

Found links: {'links': [{'type': 'about page', 'url': 'https://www.anthropic.com/company'}, {'type': 'careers page', 'url': 'https://www.anthropic.com/careers'}, {'type': 'news page', 'url': 'https://www.anthropic.com/news'}, {'type': 'research page', 'url': 'https://www.anthropic.com/research'}, {'type': 'team page', 'url': 'https://www.anthropic.com/team'}]}



# Welcome to Anthropic

## Overview
At **Anthropic**, we are dedicated to building AI systems that prioritize safety, reliability, and human well-being. With our leading AI model, **Claude 3.7 Sonnet**, we strive to demonstrate what responsible AI development looks like in practice.

### Our Mission
We believe that AI has the potential to greatly impact society. Our purpose is to create powerful AI tools that serve humanity's long-term well-being by focusing on safety and interpretability in AI systems. 

---

## Our Commitment to AI Safety
- **Safety is a Science**: We treat AI safety as a systematic science, conducting research and applying it to our products, continually refining our approaches based on rigorous testing and real-world applicability.
- **Interdisciplinary Team**: Our team combines talents from various fields, including research, engineering, policy, and operations, allowing us to tackle the multifaceted challenges of AI safety.
- **Collaboration**: We engage with different sectors - from civil society to industry leaders - to promote a holistic approach to AI safety, recognizing that we are one part of a larger puzzle.

---

## Products & Solutions
- **Claude**: Our flagship AI model designed to be reliable, interpretable, and steerable. Whether you're looking to build applications or enhance processes, Claude serves as a trusted partner.
- **API Access**: Create custom AI-powered applications with ACCESS to Claude through our user-friendly API.
- **Anthropic Academy**: Our educational platform offers resources to help users learn how to best utilize Claude for diverse needs.

---

## Company Culture
At Anthropic, we foster a culture of collaboration and continuous learning. We believe every team member plays a crucial role in advancing our mission. Our diverse team brings a wealth of experience from various backgrounds, creating an environment rich in innovation and creativity.

---

## Career Opportunities
**Want to join us in building the future of safe AI?** We are looking for passionate and talented individuals to become part of our team. Our roles include:

- Researchers
- Engineers
- Policy Experts
- Business Operations

Check our website for open roles and start your journey with us!

---

## Be a Part of the Future
At Anthropic, we are committed to creating AI systems that put safety and human benefits at the forefront. Whether you are a customer, investor, or prospective recruit, we invite you to join us in shaping the future of AI.

**Learn more or get in touch at [Anthropic.com](https://www.anthropic.com)**

© 2025 Anthropic PBC



In [25]:
# Try changing the system prompt to the humorous version when you make the Brochure for Hugging Face:

stream_brochure("HuggingFace", "https://huggingface.co")

Found links: {'links': [{'type': 'about page', 'url': 'https://huggingface.co/huggingface'}, {'type': 'careers page', 'url': 'https://apply.workable.com/huggingface/'}, {'type': 'enterprise page', 'url': 'https://huggingface.co/enterprise'}, {'type': 'pricing page', 'url': 'https://huggingface.co/pricing'}, {'type': 'blog page', 'url': 'https://huggingface.co/blog'}, {'type': 'documentation page', 'url': 'https://huggingface.co/docs'}]}


# Hugging Face Company Brochure

---

## Who We Are

### Hugging Face: Building the Future of AI

Hugging Face is at the forefront of the artificial intelligence community, dedicated to creating a collaborative platform where machine learning enthusiasts and professionals can come together. Our mission focuses on making advanced AI technology accessible while fostering innovation in a vibrant open-source environment.

---

## What We Offer

### Products and Services
- **Models**: With over 1 million models available for various applications, we provide the tools you need to build sophisticated AI-driven applications.
- **Datasets**: Access to a rich library of over 250,000 datasets tailored for machine learning tasks to facilitate research and development.
- **Spaces**: A unique feature supporting unlimited public models and applications for seamless sharing and collaboration.
- **Compute and Enterprise Solutions**: We offer paid compute resources starting at just $0.60/hour, along with enterprise-grade services starting at $20/user/month, ensuring secure and scalable solutions for businesses of all sizes.

---

## Our Community

### Diverse Customer Base

More than **50,000 organizations** are leveraging the power of Hugging Face, including leading companies like:
- **Google**
- **Amazon**
- **Microsoft**
- **Grammarly**
- **Meta**

Our community is passionate about AI and machine learning, enabling individuals and companies alike to innovate faster.

### Join the Movement

By becoming part of the Hugging Face community, you can learn, collaborate, and explore the many possibilities AI has to offer. 

---

## Company Culture

### Collaboration and Innovation

Our work culture fosters inclusivity and collaboration, where every member is encouraged to share their ideas and work on projects that contribute to our mission. We believe in the power of open-source development and the importance of community contributions.

---

## Careers at Hugging Face

**Join a Growing Team**

We're always on the lookout for talents who share our passion for AI and innovation. We value diversity and strive to make our workplace a creative and inspiring environment for everyone. 

### Opportunities
- **Roles**: From software engineering to product management and community engagement, we offer a variety of roles to suit different skill sets.
- **Culture**: Work in a dynamic setting where your input matters, and where your skills can make a real difference in the AI landscape.

*Explore available job opportunities on our [Careers page](https://huggingface.co/jobs).*

---

## Connect With Us

- **Website**: [huggingface.co](https://huggingface.co)
- **Twitter**: [@huggingface](https://twitter.com/huggingface)
- **LinkedIn**: [huggingface](https://www.linkedin.com/company/huggingface)
- **Discord**: Join our community for discussions and collaborations.

---

### Embrace the Future of AI with Hugging Face
Join us on this exciting journey and be part of the community that is shaping the future of artificial intelligence!

<table style="margin: 0; text-align: left;">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../business.jpg" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#181;">Business applications</h2>
            <span style="color:#181;">In this exercise we extended the Day 1 code to make multiple LLM calls, and generate a document.

This is perhaps the first example of Agentic AI design patterns, as we combined multiple calls to LLMs. This will feature more in Week 2, and then we will return to Agentic AI in a big way in Week 8 when we build a fully autonomous Agent solution.

Generating content in this way is one of the very most common Use Cases. As with summarization, this can be applied to any business vertical. Write marketing content, generate a product tutorial from a spec, create personalized email content, and so much more. Explore how you can apply content generation to your business, and try making yourself a proof-of-concept prototype. See what other students have done in the community-contributions folder -- so many valuable projects -- it's wild!</span>
        </td>
    </tr>
</table>

<table style="margin: 0; text-align: left;">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../important.jpg" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#900;">Before you move to Week 2 (which is tons of fun)</h2>
            <span style="color:#900;">Please see the week1 EXERCISE notebook for your challenge for the end of week 1. This will give you some essential practice working with Frontier APIs, and prepare you well for Week 2.</span>
        </td>
    </tr>
</table>

<table style="margin: 0; text-align: left;">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../resources.jpg" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#f71;">A reminder on 3 useful resources</h2>
            <span style="color:#f71;">1. The resources for the course are available <a href="https://edwarddonner.com/2024/11/13/llm-engineering-resources/">here.</a><br/>
            2. I'm on LinkedIn <a href="https://www.linkedin.com/in/eddonner/">here</a> and I love connecting with people taking the course!<br/>
            3. I'm trying out X/Twitter and I'm at <a href="https://x.com/edwarddonner">@edwarddonner<a> and hoping people will teach me how it's done..  
            </span>
        </td>
    </tr>
</table>

<table style="margin: 0; text-align: left;">
    <tr>
        <td style="width: 150px; height: 150px; vertical-align: middle;">
            <img src="../thankyou.jpg" width="150" height="150" style="display: block;" />
        </td>
        <td>
            <h2 style="color:#090;">Finally! I have a special request for you</h2>
            <span style="color:#090;">
                My editor tells me that it makes a MASSIVE difference when students rate this course on Udemy - it's one of the main ways that Udemy decides whether to show it to others. If you're able to take a minute to rate this, I'd be so very grateful! And regardless - always please reach out to me at ed@edwarddonner.com if I can help at any point.
            </span>
        </td>
    </tr>
</table>