# A full business solution

In [46]:
import os 
import requests
import json
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
from openai import OpenAI

In [47]:
OLLAMA_API = "http://localhost:11434/api/chat"
HEADERS = {"Content-Type": "application/json"}
MODEL = "llama3.2"

OLLAMA_BASE_URL = "http://localhost:11434/v1"

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

In [37]:
ollama_client = OpenAI(base_url=OLLAMA_BASE_URL, api_key='ollama')

In [41]:
class Website:
    """Website scraping class (unchanged from your original)"""
    def __init__(self, url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup = BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text = ""
        links = [link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [42]:
demo = Website("https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e")

In [43]:
print(demo.title)

Yōkoso Jitsuryoku Shijō Shugi no Kyōshitsu e | You-Zitsu Wiki | Fandom


In [44]:
demo.links

['http://you-zitsu.com/news.html#news-240901-1',
 'https://www.fandom.com/',
 'https://auth.fandom.com/signin?source=mw&redirect=https%3A%2F%2Fyou-zitsu.fandom.com%2Fwiki%2FY%25C5%258Dkoso_Jitsuryoku_Shij%25C5%258D_Shugi_no_Ky%25C5%258Dshitsu_e',
 'https://auth.fandom.com/register?source=mw&redirect=https%3A%2F%2Fyou-zitsu.fandom.com%2Fwiki%2FY%25C5%258Dkoso_Jitsuryoku_Shij%25C5%258D_Shugi_no_Ky%25C5%258Dshitsu_e',
 'https://www.fandom.com/',
 'https://you-zitsu.fandom.com',
 'https://you-zitsu.fandom.com',
 '#',
 'https://you-zitsu.fandom.com/wiki/You-Zitsu_Wiki',
 '/f',
 'https://you-zitsu.fandom.com/wiki/Special:AllPages',
 'https://you-zitsu.fandom.com/wiki/Special:Community',
 'https://you-zitsu.fandom.com/wiki/Special:AllMaps',
 '/Blog:Recent_posts',
 'https://you-zitsu.fandom.com/wiki/Category:Series',
 'https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e',
 'https://you-zitsu.fandom.com/wiki/Category:Light_Novel_Volumes',
 'https://you-

## Have GPT-4o-mini figure out which links are relevant

In [45]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in a brochure about the company, \
such as links to an About page, or a Company page, or Careers/Jobs pages.\n"
link_system_prompt += "You should respond in JSON as in this example:"
link_system_prompt += """
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [8]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages.
You should respond in JSON as in this example:
{
    "links": [
        {"type": "about page", "url": "https://full.url/goes/here/about"},
        {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [9]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. \
Do not include Terms of Service, Privacy, email links.\n"
    user_prompt += "Links (some might be relative links):\n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [10]:
print(get_links_user_prompt(demo))

Here is the list of links on the website of https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e - please decide which of these are relevant web links for a brochure about the company, respond with the full https URL in JSON format. Do not include Terms of Service, Privacy, email links.
Links (some might be relative links):
http://you-zitsu.com/news.html#news-240901-1
https://www.fandom.com/
https://auth.fandom.com/signin?source=mw&redirect=https%3A%2F%2Fyou-zitsu.fandom.com%2Fwiki%2FY%25C5%258Dkoso_Jitsuryoku_Shij%25C5%258D_Shugi_no_Ky%25C5%258Dshitsu_e
https://auth.fandom.com/register?source=mw&redirect=https%3A%2F%2Fyou-zitsu.fandom.com%2Fwiki%2FY%25C5%258Dkoso_Jitsuryoku_Shij%25C5%258D_Shugi_no_Ky%25C5%258Dshitsu_e
https://www.fandom.com/
https://you-zitsu.fandom.com
https://you-zitsu.fandom.com
#
https://you-zitsu.fandom.com/wiki/You-Zitsu_Wiki
/f
https://you-zitsu.fandom.com/wiki/Special:AllPages
https://you-zitsu.fandom.com/wiki/Special:C

In [49]:
def get_links(url):
    website = Website(url)
    
    try:
        response = ollama_client.chat.completions.create(
            model=MODEL,
            messages=[
                {"role": "system", "content": link_system_prompt},
                {"role": "user", "content": get_links_user_prompt(website)}
            ],
            response_format={"type": "json_object"}
        )
        

        # passing json file and extracting data 
        result = response.choices[0].message.content
        return json.loads(result)
    
    except Exception as e:
        print(f"Error processing links: {e}")
        return {"links": []}

In [50]:
wiki = Website("https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e")
print("All links:", wiki.links)

All links: ['http://you-zitsu.com/news.html#news-240901-1', 'https://www.fandom.com/', 'https://auth.fandom.com/signin?source=mw&redirect=https%3A%2F%2Fyou-zitsu.fandom.com%2Fwiki%2FY%25C5%258Dkoso_Jitsuryoku_Shij%25C5%258D_Shugi_no_Ky%25C5%258Dshitsu_e', 'https://auth.fandom.com/register?source=mw&redirect=https%3A%2F%2Fyou-zitsu.fandom.com%2Fwiki%2FY%25C5%258Dkoso_Jitsuryoku_Shij%25C5%258D_Shugi_no_Ky%25C5%258Dshitsu_e', 'https://www.fandom.com/', 'https://you-zitsu.fandom.com', 'https://you-zitsu.fandom.com', '#', 'https://you-zitsu.fandom.com/wiki/You-Zitsu_Wiki', '/f', 'https://you-zitsu.fandom.com/wiki/Special:AllPages', 'https://you-zitsu.fandom.com/wiki/Special:Community', 'https://you-zitsu.fandom.com/wiki/Special:AllMaps', '/Blog:Recent_posts', 'https://you-zitsu.fandom.com/wiki/Category:Series', 'https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e', 'https://you-zitsu.fandom.com/wiki/Category:Light_Novel_Volumes', 'https://you-zitsu.

In [52]:
print("Filtered relevant links:", get_links("https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e"))

Filtered relevant links: {}


In [53]:
display(Markdown(f"### Filtered Links\n```json\n{json.dumps(get_links('https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e'), indent=2)}\n```"))

### Filtered Links
```json
{}
```

### Getting all details

In [56]:
def get_all_details(url):
    result = "Landing page:\n"
    website = Website(url)
    result += website.get_contents()
    
    links = get_links(url)
    print("Found links:", links)

    if not links or "links" not in links or not links["links"]:
        result += "\n\n⚠️ No relevant subpages found.\n"
    else:
        for link in links["links"]:
            try:
                result += f"\n\n{link.get('type', 'unknown')} page:\n"
                # Handle relative URLs
                full_url = link['url'] if link['url'].startswith('http') else f"{url.rstrip('/')}/{link['url'].lstrip('/')}"
                result += Website(full_url).get_contents()
            except Exception as e:
                result += f"\n\n⚠️ Could not process {link.get('url', 'unknown')}: {str(e)}\n"
    
    return result  # You were missing this return statement

Found links: {}
Landing page:
Webpage Title:
Yōkoso Jitsuryoku Shijō Shugi no Kyōshitsu e | You-Zitsu Wiki | Fandom
Webpage Contents:
You-Zitsu Wiki
NEWS:
Season 4 of the anime adaptation has been announced!
Stay tuned for updates!
READ MORE
Sign In
Register
You-Zitsu Wiki
Explore
Main Page
Discuss
All Pages
Community
Interactive Maps
Recent Blog Posts
Series
Light Novel
List of Light Novel Volumes
Prequel Volume
1st Year
2nd Year
3rd Year
List of Short Stories
Year 1
Year 2
Year 3
LN Graphics
Manga
List of Manga Volumes
1st Year
2nd Year
List of Manga Chapters
1st Year
2nd Year
Monthly Comic Alive
Anime
Season 1
Volume 1
Volume 2
Volume 3
Volume 4
Season 2
Volume 1
Volume 2
Volume 3
Volume 4
Season 3
Volume 1
Volume 2
Volume 3
Volume 4
Music
Adaptation Differences
Others
Spin-offs
You-Zitsu: √Horikita
You-Zitsu: Other School Days
Art Books
Tomoseshunsaku Special Art Works
Tomoseshunsaku Art Works
Art Fan Book Winter 2017
You-Zitsu: The End: First Year Arc Box — Tomoseshunsaku Art Work

In [57]:
print(get_all_details("https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e"))

Found links: {'name': 'You-Zitsu', 'type': 'Fandom', 'id': 'Q1044', 'link': 'https://you-zitsu.fandom.com/', 'description': ''}
Landing page:
Webpage Title:
Yōkoso Jitsuryoku Shijō Shugi no Kyōshitsu e | You-Zitsu Wiki | Fandom
Webpage Contents:
You-Zitsu Wiki
NEWS:
Season 4 of the anime adaptation has been announced!
Stay tuned for updates!
READ MORE
Sign In
Register
You-Zitsu Wiki
Explore
Main Page
Discuss
All Pages
Community
Interactive Maps
Recent Blog Posts
Series
Light Novel
List of Light Novel Volumes
Prequel Volume
1st Year
2nd Year
3rd Year
List of Short Stories
Year 1
Year 2
Year 3
LN Graphics
Manga
List of Manga Volumes
1st Year
2nd Year
List of Manga Chapters
1st Year
2nd Year
Monthly Comic Alive
Anime
Season 1
Volume 1
Volume 2
Volume 3
Volume 4
Season 2
Volume 1
Volume 2
Volume 3
Volume 4
Season 3
Volume 1
Volume 2
Volume 3
Volume 4
Music
Adaptation Differences
Others
Spin-offs
You-Zitsu: √Horikita
You-Zitsu: Other School Days
Art Books
Tomoseshunsaku Special Art Works
To

In [58]:
system_prompt = "You are an assistant that analyzes the contents of several relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown.\
Include details of company culture, customers and careers/jobs if you have the information."

In [59]:
def get_brochure_user_prompt(company_name, url):
    user_prompt = f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\n"
    user_prompt += get_all_details(url)
    user_prompt = user_prompt[:5_000]
    return user_prompt

In [60]:
get_brochure_user_prompt("Classroom of the elite", "https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e")

Found links: {'@context': 'https://schema.org', '@type': 'WebSite', ' name': 'You-Zitsu', 'url': 'https://you-zitsu.fandom.com/', 'description': '', 'keywords': '', 'contentLanguage': 'en-CA', 'mainEntity': [{'@type': 'EpisodeOfScreenContent', 'name': '', 'image': '', 'datePublished': '2021-09-22'}], 'image': {'@type': 'ImageObject', 'url': '', 'height': None, 'width': None}, 'publisher': {'@type': 'Organization', 'name': '', 'logo': {'@type': 'ImageObject', 'url': ''}}, 'encodingFormat': ['application/rss+xml'], 'author': {'@type': 'Person', 'name': ''}, 'alternateName': [], 'SameAs': ['//about.fandom.com/']}


"You are looking at a company called: Classroom of the elite\nHere are the contents of its landing page and other relevant pages; use this information to build a short brochure of the company in markdown.\nLanding page:\nWebpage Title:\nYōkoso Jitsuryoku Shijō Shugi no Kyōshitsu e | You-Zitsu Wiki | Fandom\nWebpage Contents:\nYou-Zitsu Wiki\nNEWS:\nSeason 4 of the anime adaptation has been announced!\nStay tuned for updates!\nREAD MORE\nSign In\nRegister\nYou-Zitsu Wiki\nExplore\nMain Page\nDiscuss\nAll Pages\nCommunity\nInteractive Maps\nRecent Blog Posts\nSeries\nLight Novel\nList of Light Novel Volumes\nPrequel Volume\n1st Year\n2nd Year\n3rd Year\nList of Short Stories\nYear 1\nYear 2\nYear 3\nLN Graphics\nManga\nList of Manga Volumes\n1st Year\n2nd Year\nList of Manga Chapters\n1st Year\n2nd Year\nMonthly Comic Alive\nAnime\nSeason 1\nVolume 1\nVolume 2\nVolume 3\nVolume 4\nSeason 2\nVolume 1\nVolume 2\nVolume 3\nVolume 4\nSeason 3\nVolume 1\nVolume 2\nVolume 3\nVolume 4\nMusic\nA

In [61]:
def create_brochure(company_name, url):
    response = ollama_via_openai.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
          ],
    )
    result = response.choices[0].message.content
    display(Markdown(result))

In [62]:
create_brochure("Classroom of the elite", "https://you-zitsu.fandom.com/wiki/Y%C5%8Dkoso_Jitsuryoku_Shij%C5%8D_Shugi_no_Ky%C5%8Dshitsu_e")

Found links: {}


**The Classroom of the Elite: A Prestigious Institution for Talented Students**
================================================================================

Welcome to the Classroom of the Elite, a prestigious institution that fosters academic excellence and prepares students for success in today's competitive world. Our school is built on the principles of hard work, discipline, and creativity, providing our students with a unique learning experience that shapes them into leaders of tomorrow.

**Our Culture**
-------------

At the Classroom of the Elite, we value:

*   **Intelligence**: We believe in cultivating intelligence and wisdom through rigorous academic programs and challenging coursework.
*   **Discipline**: A strong sense of discipline is essential for achieving success. Our students are expected to maintain high standards of behavior and adhere to our code of conduct.
*   **Creativity**: We encourage creativity and innovation, providing opportunities for our students to express themselves and explore their passions.

**Our Students**
----------------

Our students are the cream of the crop – talented individuals who have demonstrated exceptional academic ability and a strong work ethic. Our classes are comprised of:

| Class | Representative |
| --- | --- |
| 1-A | Arisu Sakayanagi (Chairman) |
| 1-B | Honami Ichinose |
| 1-C | Kakeru Ryuen |
| 1-D | Suzune Horikita |

**Our Faculty**
----------------

Our faculty is composed of experienced educators who are dedicated to providing our students with the best possible education. Our principal serves as: the Chairman of the Student council, and our notable alumni have gone on to achieve great success in their chosen fields.

---

### Careers/ jobs
We offer various scholarship opportunities for talented students from all over Japan.
We also provide job placement assistance once graduation is achieved.

**Join Us**
-----------

Are you a talented individual looking for a challenging and rewarding educational experience? Do you aspire to be part of an elite group of students who share your passion for learning? Join us at the Classroom of the Elite and discover a world of academic excellence and personal growth.