In [1]:
import os
import re
import requests
import socket
from requests.exceptions import ConnectionError
from requests.exceptions import MissingSchema
from requests.exceptions import InvalidSchema
from urllib3.exceptions import MaxRetryError, NameResolutionError
import json
from typing import List
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display, update_display
import ollama

In [2]:
load_dotenv(override=True)
MODEL = 'llama3.2'

In [4]:
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 \
    (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

class Website:
    def __init__(self,url):
        self.url = url
        response = requests.get(url, headers=headers)
        self.body = response.content
        soup=BeautifulSoup(self.body, 'html.parser')
        self.title = soup.title.string if soup.title else "No title found"
        if soup.body:
            for irrelevant in soup.body(["script", "style", "img", "input"]):
                irrelevant.decompose()
            self.text = soup.body.get_text(separator="\n", strip=True)
        else:
            self.text=""
        links=[link.get('href') for link in soup.find_all('a')]
        self.links = [link for link in links if link]

    def get_contents(self):
        return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"

In [5]:
page = Website("https://huggingface.co")
page.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/posts',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/nvidia/parakeet-tdt-0.6b-v2',
 '/ACE-Step/ACE-Step-v1-3.5B',
 '/Lightricks/LTX-Video',
 '/nari-labs/Dia-1.6B',
 '/lodestones/Chroma',
 '/models',
 '/spaces/smolagents/computer-agent',
 '/spaces/enzostvs/deepsite',
 '/spaces/ByteDance/DreamO',
 '/spaces/ACE-Step/ACE-Step',
 '/spaces/NihalGazi/FLUX-Pro-Unlimited',
 '/spaces',
 '/datasets/DMindAI/DMind_Benchmark',
 '/datasets/nvidia/OpenCodeReasoning',
 '/datasets/nvidia/OpenMathReasoning',
 '/datasets/nvidia/Nemotron-CrossThink',
 '/datasets/openbmb/Ultra-FineWeb',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/transformers',
 '/docs/diffusers',


In [8]:
link_system_prompt = "You are provided with a list of links found on a webpage. \
You are able to decide which of the links would be most relevant to include in \
a brochure about the company, such as links to an About page, or a Company page, \
or Careers/Jobs pages. \n"
link_system_prompt += "You should respond only in JSON, without text, object as in this example:"
link_system_prompt += """
{
    "links": [
    {"type": "about page", "url": "https://full.url/goes/here/about"},
    {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}
"""

In [9]:
print(link_system_prompt)

You are provided with a list of links found on a webpage. You are able to decide which of the links would be most relevant to include in a brochure about the company, such as links to an About page, or a Company page, or Careers/Jobs pages. 
You should respond only in JSON, without text, object as in this example:
{
    "links": [
    {"type": "about page", "url": "https://full.url/goes/here/about"},
    {"type": "careers page": "url": "https://another.full.url/careers"}
    ]
}



In [10]:
def get_links_user_prompt(website):
    user_prompt = f"Here is the list of links on the website of {website.url} - "
    user_prompt += "please decide which of these are relevant web links for \
    a brochure about the company, respond with the full https URL in clean JSON format \
    without text json on the beginning of the response. \
    Do not include Terms of Service, Privacy, email links. \n"
    user_prompt+="Links (some might be relative links): \n"
    user_prompt += "\n".join(website.links)
    return user_prompt

In [11]:
print(get_links_user_prompt(page))

Here is the list of links on the website of https://huggingface.co - please decide which of these are relevant web links for     a brochure about the company, respond with the full https URL in clean JSON format     without text json on the beginning of the response.     Do not include Terms of Service, Privacy, email links. 
Links (some might be relative links): 
/
/models
/datasets
/spaces
/posts
/docs
/enterprise
/pricing
/login
/join
/spaces
/models
/nvidia/parakeet-tdt-0.6b-v2
/ACE-Step/ACE-Step-v1-3.5B
/Lightricks/LTX-Video
/nari-labs/Dia-1.6B
/lodestones/Chroma
/models
/spaces/smolagents/computer-agent
/spaces/enzostvs/deepsite
/spaces/ByteDance/DreamO
/spaces/ACE-Step/ACE-Step
/spaces/NihalGazi/FLUX-Pro-Unlimited
/spaces
/datasets/DMindAI/DMind_Benchmark
/datasets/nvidia/OpenCodeReasoning
/datasets/nvidia/OpenMathReasoning
/datasets/nvidia/Nemotron-CrossThink
/datasets/openbmb/Ultra-FineWeb
/datasets
/join
/pricing#endpoints
/pricing#spaces
/pricing
/enterprise
/enterprise
/ent

In [12]:
def get_links(url):
    website=Website(url)
    response = ollama.chat(
        model=MODEL,
        messages=[
            {"role": "system", "content": link_system_prompt},
            {"role": "user", "content": get_links_user_prompt(website)}
        ],
         options={"format": "json"}
    )
    result = response['message']['content']

    print(result)
    try:
        content_json = json.loads(result)
        return content_json
    except json.JSONDecodeError:
        print("Odpowiedź nie jest poprawnym JSON")
        

In [13]:
huggingface = Website("https://huggingface.co")
huggingface.links

['/',
 '/models',
 '/datasets',
 '/spaces',
 '/posts',
 '/docs',
 '/enterprise',
 '/pricing',
 '/login',
 '/join',
 '/spaces',
 '/models',
 '/nvidia/parakeet-tdt-0.6b-v2',
 '/ACE-Step/ACE-Step-v1-3.5B',
 '/Lightricks/LTX-Video',
 '/nari-labs/Dia-1.6B',
 '/lodestones/Chroma',
 '/models',
 '/spaces/smolagents/computer-agent',
 '/spaces/enzostvs/deepsite',
 '/spaces/ByteDance/DreamO',
 '/spaces/ACE-Step/ACE-Step',
 '/spaces/NihalGazi/FLUX-Pro-Unlimited',
 '/spaces',
 '/datasets/DMindAI/DMind_Benchmark',
 '/datasets/nvidia/OpenCodeReasoning',
 '/datasets/nvidia/OpenMathReasoning',
 '/datasets/nvidia/Nemotron-CrossThink',
 '/datasets/openbmb/Ultra-FineWeb',
 '/datasets',
 '/join',
 '/pricing#endpoints',
 '/pricing#spaces',
 '/pricing',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/enterprise',
 '/allenai',
 '/facebook',
 '/amazon',
 '/google',
 '/Intel',
 '/microsoft',
 '/grammarly',
 '/Writer',
 '/docs/transformers',
 '/docs/diffusers',


In [14]:
get_links("https://huggingface.co")

{
  "links": [
    {"type": "About page", "url": "https://huggingface.co/about"},
    {"type": "Company page", "url": "https://huggingface.co/brand"},
    {"type": "Blog", "url": "https://blog.huggingface.co"},
    {"type": "GitHub", "url": "https://github.com/huggingface"},
    {"type": "Twitter", "url": "https://twitter.com/huggingface"},
    {"type": "LinkedIn", "url": "https://www.linkedin.com/company/huggingface/"},
    {"type": "Discord server", "url": "https://join.discord.huggingface.co"}
  ]
}


{'links': [{'type': 'About page', 'url': 'https://huggingface.co/about'},
  {'type': 'Company page', 'url': 'https://huggingface.co/brand'},
  {'type': 'Blog', 'url': 'https://blog.huggingface.co'},
  {'type': 'GitHub', 'url': 'https://github.com/huggingface'},
  {'type': 'Twitter', 'url': 'https://twitter.com/huggingface'},
  {'type': 'LinkedIn', 'url': 'https://www.linkedin.com/company/huggingface/'},
  {'type': 'Discord server', 'url': 'https://join.discord.huggingface.co'}]}

In [17]:
def get_all_details(url):
    result = "Langing page: \n"
    result += Website(url).get_contents()
    links = get_links(url)
    for link in links["links"]:
        try:
            result += f"\n\n{link['type']}\n"
            result += Website(link["url"]).get_contents()
        except socket.gaierror as e:
            print(f"DNS resolution failed: {e}")
        except NameResolutionError as e:
            print(f"Name resolution error: {e}")
        except MaxRetryError as e:
            print(f"Max retries exceeded: {e}")
        except ConnectionError as e:
            print(f"Connection error: {e}")
        except MissingSchema as e:
            print(f"Invalid URL schema: {e}")
        except InvalidSchema as e:
            print(f"Omitted unsupported URL (InvalidSchema): {e}")
        return result

In [18]:
print(get_all_details("https://huggingface.co"))

{
  "links": [
    {"type": "company page", "url": "https://huggingface.co/"},
    {"type": "About page", "url": "https://huggingface.co/team"},
    {"type": "Careers/Jobs page", "url": "https://apply.workable.com/huggingface/"},
    {"type": "blog", "url": "https://blog.huggingface.co/"},
    {"type": "GitHub repository", "url": "https://github.com/huggingface"},
    {"type": "Twitter handle", "url": "https://twitter.com/huggingface"},
    {"type": "LinkedIn company page", "url": "https://www.linkedin.com/company/huggingface/"}
  ]
}
Langing page: 
Webpage Title:
Hugging Face – The AI community building the future.
Webpage Contents:
Hugging Face
Models
Datasets
Spaces
Posts
Docs
Enterprise
Pricing
Log In
Sign Up
The AI community building the future.
The platform where the machine learning community collaborates on models, datasets, and applications.
Explore AI Apps
or
Browse 1M+ models
Trending on
this week
Models
nvidia/parakeet-tdt-0.6b-v2
Updated
13 days ago
•
109k
•
794
ACE-Step/A

In [19]:
system_prompt = "You are an assistant analyzes the contents of serveral relevant pages from a company website \
and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown. \
Include details of company culture, customers and careers/jobs if you have the information."

In [20]:
def get_brochure_user_prompt(company_name, url):
    user_prompt= f"You are looking at a company called: {company_name}\n"
    user_prompt += f"Here are the contents of its landing page and other relevant pages; \
    use this information to build a short brochure of the company in markdown. \n"
    user_prompt += get_all_details(url)
    user_prompt=user_prompt[:5_000]
    return user_prompt

In [24]:
def create_brochure(company_name, url):
    response= ollama.chat(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role":"user", "content": get_brochure_user_prompt(company_name, url)}
        ],
    )
    result = response['message']['content']
    display(Markdown(result))

In [25]:
create_brochure("HuggingFace", "https://huggingface.co")

{
    "links": [
        {
            "type": "About page",
            "url": "https://huggingface.co/brand"
        },
        {
            "type": "Careers page",
            "url": "https://apply.workable.com/huggingface/"
        },
        {
            "type": "Company page",
            "url": "https://www.linkedin.com/company/huggingface/"
        },
        {
            "type": "Blog page",
            "url": "https://blog.huggingface.co"
        },
        {
            "type": "GitHub repository",
            "url": "https://github.com/huggingface"
        },
        {
            "type": "Twitter profile",
            "url": "https://twitter.com/huggingface"
        },
        {
            "type": "Discord channel",
            "url": "https://join.discord.huggingface.co"
        }
    ]
}


# Welcome to Hugging Face
================================

Hugging Face is the collaboration platform for the machine learning community, empowering developers, researchers, and end-users to build an open and ethical AI future together.

## Our Mission
----------------

To create a centralized hub where anyone can share, explore, discover, and experiment with open-source machine learning (ML) models, datasets, and applications. We strive to foster a community-driven approach that promotes collaboration, innovation, and accessibility in the field of ML.

## Key Features
--------------

*   **Hugging Face Hub**: A central platform for sharing, exploring, and discovering open-source ML models, datasets, and applications.
*   **Community-Driven Approach**: Our platform is built by the community, for the community. We encourage collaboration, innovation, and participation from all stakeholders.
*   **Fast-Growing Community**: With thousands of users and organizations already on board, our community is growing rapidly.
*   **Talent in AI Research**: Our science team explores the edge of tech to deliver cutting-edge solutions and tools.

## Industry Partnerships
------------------------

Hugging Face collaborates with prominent companies across various industries, including:

*   **Meta**
*   **Amazon**
*   **Google**
*   **Intel**
*   **Microsoft**
*   **Grammarly**

These partnerships enable us to provide high-quality models, datasets, and tools that meet the needs of industry professionals.

## Our Open Source Initiatives
--------------------------------

We are committed to building the foundation of ML tooling with our community. Some of our notable open source initiatives include:

*   **Transformers**: State-of-the-art ML for PyTorch, TensorFlow, JAX.
*   **Diffusers**: State-of-the-art Diffusion models in PyTorch.
*   **Safetensors**: Safe way to store/distribute neural network weights.

## Join the Community
----------------------

If you're interested in joining our community and contributing to the development of Hugging Face, please visit our [GitHub repository](https://github.com/huggingface) or follow us on [Twitter](https://twitter.com/huggingface).

You can also explore our [Blog](https://blog.huggingface.co/) for the latest news, updates, and insights from our community.

### Careers
---------

If you're interested in joining our team as a developer, engineer, or researcher, please visit our [Careers page](https://careers.huggingface.co) to learn more about our job openings.

In [28]:
def stream_brochure(company_name, url):
    stream = ollama.chat(
        model=MODEL,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": get_brochure_user_prompt(company_name, url)}
        ],
        stream=True
    )
    result = ""
    display_handle = display(Markdown(""), display_id=True)
    for chunk in stream:
        result += chunk['message']['content'] or ''
        result = result.replace("```","").replace("markdown","")
        update_display(Markdown(result), display_id=display_handle.display_id)

In [29]:
stream_brochure("HuggingFace", "https://huggingface.co")

{
    "links": [
        {"type": "About page", "url": "https://huggingface.co/"},
        {"type": "Company page", "url": "https://huggingface.co/brand"},
        {"type": "Careers/Jobs page", "url": "https://apply.workable.com/huggingface/"},
        {"type": "Blog", "url": "https://blog.huggingface.co/"},
        {"type": "GitHub", "url": "https://github.com/huggingface"},
        {"type": "Twitter", "url": "https://twitter.com/huggingface"},
        {"type": "LinkedIn", "url": "https://www.linkedin.com/company/huggingface/"}
    ]
}


**Hugging Face Brochure**
========================

**Introduction**
---------------

Welcome to Hugging Face, the leading platform for building and deploying artificial intelligence (AI) models. Our mission is to empower the machine learning community to collaborate on models, datasets, and applications, accelerating innovation and driving progress in AI.

**Company Culture**
-------------------

At Hugging Face, we value collaboration, creativity, and diversity. We believe that the best ideas come from diverse perspectives and backgrounds. Our team is passionate about building a platform that enables researchers, developers, and businesses to work together seamlessly.

**Customers and Partnerships**
-----------------------------

We are proud to partner with over 50,000 organizations worldwide, including:

*   Ai2 (non-profit)
*   AI at Meta
*   Amazon
*   Google
*   Intel
*   Microsoft
*   Grammarly
*   Writer

These partnerships demonstrate our commitment to supporting the growth of the AI community and driving innovation in various industries.

**Products and Services**
------------------------

Our platform offers a range of products and services, including:

*   **Models**: Browse 1 million+ pre-trained models for text, image, video, audio, and more.
*   **Datasets**: Access and share datasets for any ML task.
*   **Spaces**: Collaborate on unlimited public models, datasets, and applications.
*   **Compute**: Deploy on optimized inference endpoints or update your Spaces applications to a GPU in a few clicks.
*   **Enterprise**: Get access to enterprise-grade security, access controls, dedicated support, and priority customer service.

**Open Source**
--------------

We are committed to building the foundation of ML tooling with the community. Our open-source projects include:

*   **Transformers**: State-of-the-art ML for PyTorch, TensorFlow, JAX.
*   **Diffusers**: State-of-the-art diffusion models in PyTorch.
*   **Safetensors**: Safe way to store/distribute neural network weights.

**Join the Community**
----------------------

Ready to join the Hugging Face community? Sign up for our platform and start exploring AI apps, models, datasets, and applications. Learn more about our open-source projects and stay up-to-date with the latest news and updates on our blog and forum.

**Contact Us**
--------------

Want to learn more about how we can help your organization grow in AI? Contact us at [info@huggingface.com](mailto:info@huggingface.com) or follow us on social media.

In [30]:
import ollama
import requests
import sys

try:
    response = requests.get("http://localhost:11434/api/version", timeout=5)
    print(f"Serwer Ollama odpowiada: {response.text}")
except Exception as e:
    print(f"Nie można połączyć się z serwerem Ollama: {e}")
    print("Upewnij się, że Ollama jest uruchomiona komendą: ollama serve")
    sys.exit(1)

try:
    client = ollama.Client(host='http://localhost:11434')
    models =client.list()
    print(f"Dostępne modele: {models}")
except Exception as e:
    print(f"Błąd podczas używania klienta ollama: {e}")


Serwer Ollama odpowiada: {"version":"0.6.8"}
Dostępne modele: models=[Model(model='deepseek-v2:latest', modified_at=datetime.datetime(2025, 5, 7, 12, 6, 26, 378772, tzinfo=TzInfo(+02:00)), digest='7c8c332f2df7ac4d657f3514d757d969b84ac6d3fec5b0c02bc8491bd0dc5ea1', size=8905124229, details=ModelDetails(parent_model='', format='gguf', family='deepseek2', families=['deepseek2'], parameter_size='15.7B', quantization_level='Q4_0')), Model(model='llama3.2:latest', modified_at=datetime.datetime(2025, 5, 7, 11, 41, 38, 445245, tzinfo=TzInfo(+02:00)), digest='a80c4f17acd55265feec403c7aef86be0c25983ab279d83f3bcd3abbcb5b8b72', size=2019393189, details=ModelDetails(parent_model='', format='gguf', family='llama', families=['llama'], parameter_size='3.2B', quantization_level='Q4_K_M'))]
