In [None]:
import openai
from dotenv import load_dotenv
load_dotenv()
import os

In [48]:
def website_fetcher(company_name):

    response = openai.chat.completions.create(
        model = "gpt-4o",
        messages = [
            {"role":"system", "content": """You are an expert in web browsing. When a User provides you with a Company Name, you fetch the Landing page address for that company's website and store it in json format
             """},
            {"role": "user", "content" : f"""What is the company website address for: {company_name}"""}],
            response_format= {"type": "json_object"}
    )
    
    return response.choices[0].message.content.strip()

website = website_fetcher("Indian oil")
print(website)

{
  "companyName": "Indian Oil",
  "website": "https://www.iocl.com/"
}


In [13]:
import bs4
import requests

class webscraper:

    def __init__(self, url):
        self.url = url
        self.soup = None

    def fetch_page(self):
        response = requests.get(self.url)

        if response.status_code == 200:
            self.soup = bs4.BeautifulSoup(response.text, "html.parser")
        else:
            raise Exception(f"Failed to load page: {response.status_code}")
        
    def extract_data(self, tag=None, class_name=None):
        
        if self.soup is None:
            raise Exception("Page not loaded. Call fetch_page() first")
        
        elements = self.soup.findAll(tag, class_=class_name)
        text_elements = [element.get_text(strip = True) for element in elements]
        link_elements = [element.get("href") for element in elements]
        return(text_elements, link_elements)
    
url = "https://edwarddonner.com/"
scraper = webscraper(url)
scraper.fetch_page()
text_elements, link_elements = scraper.extract_data()

  elements = self.soup.findAll(tag, class_=class_name)


# Clean the Text and Link elements

In [None]:
link_elements = [i for i in link_elements if i!=None]
text_elements = [i for i in text_elements if i!='']

openai.api_key = os.getenv("OPENAI_API_KEY")
def data_cleaner(texts, links):

    response = openai.chat.completions.create(
        model = "gpt-4o",
        messages = [
            {"role":"system", "content": """You are an excellent text data wrangler. 
            The user will provide you with a list of texts and links as strings scraped from a webpage and you will clean the list by removing unnecessary text elements. 
            Unnecessary text is anything that is not relevant to designing the brochure template for the website. Return two python lists containing cleaned 
            text and links"""},
            {"role": "user", "content" : f"""Clean the following text: {str(texts)} and links: {str(links)}"""}]
    )

    return response.choices[0].message.content.strip()

In [41]:
llm_response = data_cleaner(text_elements, link_elements)

In [42]:
from IPython.display import display, Markdown

display(Markdown(llm_response))

Here are the cleaned text and links relevant to designing a brochure template:

Cleaned Text:
1. Home - Edward Donner
2. HomeConnect FourOutsmart
3. AboutPosts
4. I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (very amateur) and losing myself in Hacker News, nodding my head sagely to things I only half understand.
5. I’m the co-founder and CTO of Nebula.io. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt, acquired in 2021.
6. We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve patented our matching model, and our award-winning platform has happy customers and tons of press coverage. Connect with me for more!
7. LLM Workshop – Hands-on with Agents – resources
8. Welcome, SuperDataScientists!
9. Mastering AI and LLM Engineering – Resources
10. From Software Engineer to AI Data Scientist – resources
11. Get in touch: ed [at] edwarddonner [dot] com
12. Follow me on LinkedIn, Twitter, Facebook
13. www.edwarddonner.com
14. Subscribe to newsletter

Cleaned Links:
1. https://edwarddonner.com/
2. https://edwarddonner.com/feed/
3. https://edwarddonner.com/comments/feed/
4. https://edwarddonner.com/wp-json/
5. https://edwarddonner.com/wp-json/wp/v2/pages/57
6. https://news.ycombinator.com
7. https://nebula.io/?utm_source=ed&utm_medium=referral
8. https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html
9. https://patents.google.com/patent/US20210049536A1/
10. https://www.linkedin.com/in/eddonner/
11. https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/
12. https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/
13. https://edwarddonner.com/2024/11/13/llm-engineering-resources/
14. https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/
15. mailto:hello@mygroovydomain.com

The cleaned text and links focus on Ed's introduction, projects, professional roles, interests, and how to connect with or follow his work, as well as the central links to the main webpage and related resources.

In [43]:
llm_response

"Here are the cleaned text and links relevant to designing a brochure template:\n\nCleaned Text:\n1. Home - Edward Donner\n2. HomeConnect FourOutsmart\n3. AboutPosts\n4. I’m Ed. I like writing code and experimenting with LLMs, and hopefully you’re here because you do too. I also enjoy DJing (but I’m badly out of practice), amateur electronic music production (very amateur) and losing myself in Hacker News, nodding my head sagely to things I only half understand.\n5. I’m the co-founder and CTO of Nebula.io. We’re applying AI to a field where it can make a massive, positive impact: helping people discover their potential and pursue their reason for being. Recruiters use our product today to source, understand, engage and manage talent. I’m previously the founder and CEO of AI startup untapt, acquired in 2021.\n6. We work with groundbreaking, proprietary LLMs verticalized for talent, we’ve patented our matching model, and our award-winning platform has happy customers and tons of press co

In [52]:
def brochure_maker(cleaned_webpage_text, stream = False):

    response = openai.chat.completions.create(
        model = "gpt-4o",
        messages = [
            {"role":"system", "content": """You are an expert in writeups for a website brochure based on the text and links on the website. You also collate the relevant material for brochure from the links provided."""},
            {"role": "user", "content" : f"""Make an impactuful writeup for a website based on the following text and links on its webpage: {cleaned_webpage_text}"""}],
            stream=stream
    )

    return response.choices[0].message.content.strip()

brochure_text = brochure_maker(llm_response)

In [None]:
display(Markdown(brochure_text))

### Discover the World of AI with Edward Donner

Welcome to the digital realm of Edward Donner - a hub for innovation, AI exploration, and cutting-edge technology insight!

#### Meet Ed
Edward Donner is a visionary at the forefront of AI development and a passionate technologist. With a penchant for writing code, experimenting with Large Language Models (LLMs), and engaging in various tech ecosystems, Ed brings a unique blend of expertise and creativity. When he’s not delving into artificial intelligence, he dabbles in DJing and electronic music production, reflecting his diverse interests.

#### Leading the Charge at Nebula.io
As the co-founder and Chief Technology Officer of **[Nebula.io](https://nebula.io/?utm_source=ed&utm_medium=referral)**, Ed is dedicated to revolutionizing talent management using AI. Nebula.io harnesses the power of bespoke, proprietary LLMs tailored for matching and managing talent in unparalleled ways. Our platform has garnered accolades for its innovative approach and effectiveness, serving recruiters to source and nurture potential like never before.

Join Ed as he transforms the landscape of recruitment through AI-driven intelligence, leveraging patented technologies and a wealth of press-covered success stories. Explore our recent contributions to talent acquisition by reading more on **[PR Newswire](https://www.prnewswire.com/news-releases/wynden-stark-group-acquires-nyc-venture-backed-tech-startup-untapt-301269512.html)** and our patented matching model on **[Google Patents](https://patents.google.com/patent/US20210049536A1/)**.

#### Educational Resources and Workshops
Edward is committed to shared learning and industry growth. Dive into a collection of resources that cover everything from mastering AI concepts to transitioning roles in tech. Participate in hands-on LLM workshops and fortify your skillset with specialized resources for AI engineers and data scientists:

- [LLM Workshop – Hands-on with Agents](https://edwarddonner.com/2025/01/23/llm-workshop-hands-on-with-agents-resources/)
- [SuperDataScience LLM Resources](https://edwarddonner.com/2024/12/21/llm-resources-superdatascience/)
- [Mastering AI and LLM Engineering](https://edwarddonner.com/2024/11/13/llm-engineering-resources/)
- [Journey from Software Engineer to AI Data Scientist](https://edwarddonner.com/2024/10/16/from-software-engineer-to-ai-data-scientist-resources/)

#### Stay Connected
Join the conversation and expand your network by connecting with Ed on [LinkedIn](https://www.linkedin.com/in/eddonner/), following his thoughts on [Twitter](https://twitter.com/), or being part of his journey on [Facebook](https://facebook.com/). For direct communication, you can reach out via email at [ed@edwarddonner.com](mailto:hello@mygroovydomain.com).

Explore more about Ed and his ventures at [edwarddonner.com](https://edwarddonner.com/) and subscribe to his newsletter for the latest updates and insights.

Step into the future of AI with Edward Donner, and be part of a transformative journey in the world of technology!