In [None]:
#%pip install bs4
#%pip install chardet
#%pip install Jinja2
#%pip install os
#%pip install lxml


Collecting lxmlNote: you may need to restart the kernel to use updated packages.

  Downloading lxml-5.3.0-cp312-cp312-win_amd64.whl.metadata (3.9 kB)
Downloading lxml-5.3.0-cp312-cp312-win_amd64.whl (3.8 MB)
   ---------------------------------------- 0.0/3.8 MB ? eta -:--:--
   --------------------------- ------------ 2.6/3.8 MB 16.9 MB/s eta 0:00:01
   ---------------------------------------- 3.8/3.8 MB 9.9 MB/s eta 0:00:00
Installing collected packages: lxml
Successfully installed lxml-5.3.0



[notice] A new release of pip is available: 24.3.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


The Crawler

In [None]:
import time
import requests
from collections import deque
from urllib.parse import urljoin, urlparse
from urllib.robotparser import RobotFileParser
import pandas as pd
from bs4 import BeautifulSoup
import re
import json
import os

################################################################################
# Global crawler data
################################################################################
nodes = []  # each: { "url": str, "id": int, "status": str }
links = []  # each: { "source": int, "target": int }

visited_domains = set()       # which unified domains we have fully crawled
visited_pages = set()         # full URLs visited
edges_set = set()             # to avoid duplicate edges
domain_crawl_count = 0
max_domain_crawl_count = 100
sleep_time = 3
negative_list = ["srf.ch", 
                 "nzz.ch", 
                 "tagesanzeiger.ch",
                 "baselwandel.ch",
                 "naturwissenschaften.ch",
                 "20min.ch",
                 "baz.ch",
                 "bzbasel.ch",
                 "klimageschichten.so.ch",
                 "daten.stadt.sg.ch",
                 "ag.ch",
                 "mathias-binswanger.ch",
                 "24heures.ch",
                 "zentrumranft.ch",
                 "redcross.ch",
                 "letemps.ch",
                 "migros-service.ch",
                 "bernerzeitung.ch",
                 "sergeandpeppers.ch",
                 "silviodezanet.ch",
                 "dergewerbeverein.ch",
                 "berneroberlaender.ch",
                 "frapp.ch",
                 "ticketcorner.ch",
                 "lelanceen.ch", 
                 "mini.ch",
                 "electrolux.ch"
                ]

positive_list = [
    "prospecierara.ch"    
    ]

################################################################################
# Update JSON file with nodes and edges
################################################################################

def load_existing_data(json_file="public/graph_data.json"):
    global nodes, links, visited_domains, visited_pages
    if os.path.exists(json_file):
        if os.path.getsize(json_file) > 0:  # Check if file is not empty
            with open(json_file, 'r') as f:
                data = json.load(f)
                # Convert "label" -> "url" if needed
                raw_nodes = data.get("nodes", [])
                for n in raw_nodes:
                    if "url" not in n and "label" in n:
                        n["url"] = n["label"]
                nodes[:] = raw_nodes
                links[:] = data.get("edges", [])

                # Load visited sets if present
                visited_domains_data = data.get("visited_domains", [])
                visited_pages_data = data.get("visited_pages", [])
                visited_domains = set(visited_domains_data) if visited_domains_data else set()
                visited_pages = set(visited_pages_data) if visited_pages_data else set()
        else:
            print(f"{json_file} is empty. Initializing data structures.")
            nodes = []
            links = []
            visited_domains = set()
            visited_pages = set()
    else:
        print(f"{json_file} does not exist. Initializing data structures.")
        nodes = []
        links = []
        visited_domains = set()
        visited_pages = set()

def generate_json_from_data(nodes, links, output_json="public/graph_data.json"):
    # Build nodes
    nodes_list = []
    for node in nodes:
        nodes_list.append({
            "id": int(node["id"]),
            "label": node["url"],
            "status": node["status"],
            "size": 3,
            "x": node.get("x", 0),
            "y": node.get("y", 0),
        })

    # Build edges
    edges_list = []
    for link in links:
        edges_list.append({
            "source": int(link["source"]),
            "target": int(link["target"])
        })

    # Convert visited sets to lists for JSON
    graph_data = {
        "nodes": nodes_list,
        "edges": edges_list,
        "visited_domains": list(visited_domains),
        "visited_pages": list(visited_pages)
    }

    os.makedirs(os.path.dirname(output_json), exist_ok=True)
    with open(output_json, 'w') as f:
        json.dump(graph_data, f, indent=4)

    print(f"Updated {output_json} with {len(nodes_list)} nodes and {len(edges_list)} edges, plus visited sets.")

def add_node_if_missing(domain_str):
    for n in nodes:
        if n["url"] == domain_str:
            return n["id"]
    new_id = len(nodes) + 1
    nodes.append({"url": domain_str, "id": new_id, "status": "Unknown"})
    return new_id

def set_node_status(domain_str, status):
    for n in nodes:
        if n["url"] == domain_str:
            n["status"] = status
            return

def get_node_id(domain_str):
    for n in nodes:
        if n["url"] == domain_str:
            return n["id"]
    raise KeyError(f"Domain not found in nodes: {domain_str}")


################################################################################
# Utility functions
################################################################################

def unify_domain(url):
    """
    Returns a domain string without scheme and without 'www.' prefix.
    Example: 'https://www.urbanagriculturebasel.ch' -> 'urbanagriculturebasel.ch'
    """
    parsed = urlparse(url)
    netloc = parsed.netloc.lower()
    if netloc.startswith("www."):
        netloc = netloc[4:]
    return netloc

def canonical_domain(url):
    """
    For internal checks, returns the netloc in lowercase (still includes 'www.' if present).
    Used to decide if a link is internal or external within BFS.
    """
    return urlparse(url).netloc.lower()

def normalize_url(url):
    """
    Return a normalized full URL with scheme if missing, and lowercase netloc.
    """
    parsed = urlparse(url)
    scheme = parsed.scheme.lower() if parsed.scheme else "https"
    netloc = parsed.netloc.lower()
    return f"{scheme}://{netloc}{parsed.path}"

def can_crawl(url):
    try:
        parsed_url = urlparse(url)
        base_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
        robots_url = urljoin(base_url, "/robots.txt")
        print(f"Checking robots.txt at: {robots_url}")  # Debugging print

        rp = RobotFileParser()
        rp.set_url(robots_url)
        rp.read()
        
        # Log the contents of the robots.txt file
        print(f"Contents of {robots_url}:")
        with requests.get(robots_url) as response:
            print(response.text)

        can_fetch = rp.can_fetch("*", url)
        print(f"Can fetch {url}: {can_fetch}")  # Debugging print
        return can_fetch
    except Exception as e:
        print(f"Error checking robots.txt for {url}: {e}")
        return True

def detect_encoding_and_decode(raw):
    """
    Attempts to decode raw bytes, using chardet if available, otherwise utf-8 fallback.
    """
    try:
        import chardet
        result = chardet.detect(raw)
        enc = result["encoding"] or "utf-8"
        return raw.decode(enc, errors="replace")
    except ImportError:
        return raw.decode("utf-8", errors="replace")

def extract_links(url):
    if not can_crawl(url):
        print(f"Robots.txt disallows crawling => {url}")
        return []
    time.sleep(sleep_time)

    headers = {"User-Agent": "Mozilla/5.0"}
    try:
        resp = requests.get(url, headers=headers, timeout=10)
        resp.raise_for_status()
        content = resp.content

        # First try html.parser, then fall back to lxml
        try:
            soup = BeautifulSoup(content, "html.parser")
        except Exception as e:
            print(f"Parser error with html.parser. Falling back to 'lxml'. Error: {e}")
            soup = BeautifulSoup(content, "lxml")

        valid_links = []
        for a in soup.find_all("a", href=True):
            raw_href = a["href"]
            try:
                merged_url = urljoin(url, raw_href)
                valid_links.append(merged_url)
            except ValueError:
                continue

        return valid_links

    except requests.RequestException as e:
        print(f"Failed to extract links from {url}: {e}")
        return []
    except UnicodeDecodeError as e:
        print(f"Unicode decode error for {url}: {e}")
        return []


def extract_visible_text(html):
    """
    Remove non-visible tags like <script>, <style>, <meta>, etc.
    Then get only the text from <body>.
    """
    soup = BeautifulSoup(html, "html.parser")
    # Remove tags not considered main content
    for tag_name in ["script", "style", "meta", "head", "noscript", "link"]:
        for t in soup.find_all(tag_name):
            t.decompose()

    # If you need to remove display:none elements:
    # for hidden in soup.select("[style*='display:none']"):
    #     hidden.decompose()

    # Some pages may not have a <body> tag; handle that gracefully
    body = soup.body
    if body is not None:
        text = body.get_text(separator=" ", strip=True)
    else:
        # fallback: entire soup
        text = soup.get_text(separator=" ", strip=True)
    
    return text

def contains_keyword(domain_url, keyword_list):
    """
    Check if the domain_url page text contains at least one of the given keywords 
    as a full standalone word. Use regex with negative lookbehind/lookahead 
    and print out what was matched for debugging.
    """
    if not can_crawl(domain_url):
        return False
    time.sleep(sleep_time)

    # Use the same headers for your keyword check
    headers = {"User-Agent": "Mozilla/5.0"}

    try:
        resp = requests.get(domain_url, headers={"User-Agent": "Mozilla/5.0"}, timeout=10)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")

        # Remove tags containing non-user-facing text
        for tag_name in ["script", "style", "head", "title", "meta", "noscript"]:
            for tag in soup.find_all(tag_name):
                tag.decompose()

        # Join all remaining text
        visible_text = ' '.join(soup.stripped_strings).lower()

        # Check if any keyword is present
        return any(kw.lower() in visible_text for kw in keyword_list)

    except Exception as e:
        print(f"contains_keyword failed for {domain_url}: {e}")
        return None

def add_node_if_missing(domain_str):
    """
    Use the 'domain_str' from unify_domain() as unique node key.
    If it doesn't exist in 'nodes', add it. Return node ID.
    """
    for n in nodes:
        if n["url"] == domain_str:
            return n["id"]
    new_id = len(nodes) + 1
    nodes.append({"url": domain_str, "id": new_id, "status": "Unknown"})
    return new_id

def set_node_status(domain_str, status):
    for n in nodes:
        if n["url"] == domain_str:
            n["status"] = status
            return

def get_node_id(domain_str):
    for n in nodes:
        if n["url"] == domain_str:
            return n["id"]
    raise KeyError(f"Domain not found in nodes: {domain_str}")

################################################################################
# BFS domain crawl
################################################################################
def bfs_crawl_domain(domain_str, depth=1):
    print(f"\n--- Crawling domain: {domain_str} (Domain #{domain_crawl_count}) ---")
    if domain_crawl_count > max_domain_crawl_count:
        print("Limit of crawlable domains reached. Stopping.")
        return

    start_page = f"https://{domain_str}"
    visited_pages.add(start_page)
    queue = deque([(start_page, 0)])
    
    # We assume the node for this domain already exists (e.g., after add_node_if_missing).
    try:
        source_id = get_node_id(domain_str)
    except KeyError:
        source_id = add_node_if_missing(domain_str)

    while queue:
        page_url, lvl = queue.popleft()
        if lvl > depth:
            break

        lower = page_url.lower()
        if lower.endswith(".pdf"):
            print(f"Skipping PDF: {page_url}")
            continue
        if lower.endswith(".jpg"):
            print(f"Skipping JPG: {page_url}")
            continue
        if lower.endswith(".mp4"):
            print(f"Skipping MP4: {page_url}")
            continue
        if lower.endswith(".exe"):
            print(f"Skipping exe: {page_url}")
            continue
        if lower.endswith(".zip"):
            print(f"Skipping ZIP: {page_url}")
            continue

        print(f"\nCrawling page: {page_url} (depth={lvl})")
        links_found = extract_links(page_url)
        print(f"Found {len(links_found)} links at {page_url}")

        # If it's the homepage (lvl=0) and we see > 200 links, classify + skip further
        if lvl == 0 and len(links_found) > 200:
            print("Homepage has more than 200 links, treating as 'Webshop-like' and skipping BFS.")
            set_node_status(domain_str, "Relevant, but possibly Webshop-like with too many links")
            break

        for link in links_found:
            link_unified = unify_domain(link).strip()
            if not link_unified:
                # Domain is empty or invalid
                continue

            if link_unified != domain_str:
                # External link => only add node and link if you actually want it
                try:
                    target_id = get_node_id(link_unified)
                except KeyError:
                    target_id = add_node_if_missing(link_unified)
                if (source_id, target_id) not in edges_set:
                    edges_set.add((source_id, target_id))
                    links.append({"source": source_id, "target": target_id})

            else:
                # Internal link => BFS deeper
                norm = normalize_url(link)
                if norm not in visited_pages and lvl < depth:
                    visited_pages.add(norm)
                    queue.append((norm, lvl + 1))

    # Finally, update the JSON output after finishing this domain
    generate_json_from_data(nodes, links, "public/graph_data.json")

################################################################################
# Main
################################################################################
def main():
    keyword_list = ["landwirtschaft", "landwirtschaftlich","agriculture","agricoltura","farming","agrar","fattoria","agricole","ferme","paysan","plouc","bauer"]

    # Load existing data from JSON
    load_existing_data()

    start_url = "https://electrolux.ch/"
    start_unified = unify_domain(start_url)
    add_node_if_missing(start_unified)

    # If domain is in positive_list, skip keyword check => mark Relevant + BFS
    if any(pdom in start_unified for pdom in positive_list):
        set_node_status(start_unified, "Relevant")
        bfs_crawl_domain(start_unified, depth=1)
    elif any(nd in start_unified for nd in negative_list):
        set_node_status(start_unified, "Negativliste")
    elif start_unified.endswith(".ch"):
        keyword_check = contains_keyword(start_url, keyword_list)
        if keyword_check is None:
            set_node_status(start_unified, "could not test keyword")
        elif keyword_check:
            set_node_status(start_unified, "Relevant")
            bfs_crawl_domain(start_unified, depth=1)
        else:
            set_node_status(start_unified, "Kein Bezug zu Landwirtschaft")
    else:
        set_node_status(start_unified, "Nicht in der Schweiz")

    idx = 0
    while idx < len(nodes) and domain_crawl_count < max_domain_crawl_count:
        node = nodes[idx]
        idx += 1

        if node["url"] in visited_domains:
            continue
        
        if node["status"] in ("Start", "Relevant"):
            generate_json_from_data(nodes, links, 'public/graph_data.json')  # Update JSON
            continue

        dom_str = node["url"]

        if dom_str in visited_domains:
            generate_json_from_data(nodes, links, 'public/graph_data.json')  # Update JSON
            continue

        if any(nd in dom_str for nd in negative_list):
            set_node_status(dom_str, "Negativliste")
            visited_domains.add(dom_str)  # Mark visited
            generate_json_from_data(nodes, links, 'public/graph_data.json')  # Update JSON
            continue

        if any(pdom in dom_str for pdom in positive_list):
            set_node_status(dom_str, "Relevant") 
            generate_json_from_data(nodes, links, 'public/graph_data.json')  # Update JSON
            bfs_crawl_domain(dom_str, depth=1)
            continue

        if dom_str.endswith(".ch"):
            keyword_check = contains_keyword(f"https://{dom_str}", keyword_list)
            if keyword_check is None:
                set_node_status(dom_str, "could not test keyword")
            elif keyword_check:
                set_node_status(dom_str, "Relevant")
                bfs_crawl_domain(dom_str, depth=1)
            else:
                set_node_status(dom_str, "Kein Bezug zu Landwirtschaft")
            visited_domains.add(dom_str)  # Mark visited here as well
            generate_json_from_data(nodes, links, 'public/graph_data.json')  # Update JSON
        else:
            set_node_status(dom_str, "Nicht in der Schweiz")
            visited_domains.add(dom_str)  # Mark visited
            generate_json_from_data(nodes, links, 'public/graph_data.json')  # Update JSON


    





if __name__ == "__main__":
    main()

Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Updated public/graph_data.json with 36334 nodes and 81225 edges, plus visited sets.
Checking robots.txt at: https://tel.local.ch/robots.txt
Can fetch https://te

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://badanvins.ch/wp-content/uploads/2019/01/VOE-Label-Certify-Digital-A-150x150.png

Crawling page: https://badanvins.ch/conditions-generales-de-vente/ (depth=1)
Checking robots.txt at: https://badanvins.ch/robots.txt
Can fetch https://badanvins.ch/conditions-generales-de-vente/: True
Found 60 links at https://badanvins.ch/conditions-generales-de-vente/
Updated public/graph_data.json with 36645 nodes and 81741 edges, plus visited sets.
Updated public/graph_data.json with 36645 nodes and 81741 edges, plus visited sets.
Updated public/graph_data.json with 36645 nodes and 81741 edges, plus visited sets.
Checking robots.txt at: https://boulangerie-patisserie-laptitepause.ch/robots.txt
Can fetch https://boulangerie-patisserie-laptitepause.ch: True

--- Crawling domain: boulangerie-patisserie-laptitepause.ch (Domain #0) ---

Crawling page: https://boulangerie-patisserie-laptitepause.ch (depth=0)
Checking robots.txt at: https://boulangerie-patisserie-laptitepause.ch/robot

  k = self.parse_starttag(i)


Found 0 links at https://www.bal-blanc.ch/feed/
Updated public/graph_data.json with 36646 nodes and 81750 edges, plus visited sets.
Updated public/graph_data.json with 36646 nodes and 81750 edges, plus visited sets.
Checking robots.txt at: https://ecuyer-des-saveurs.ch/robots.txt
Can fetch https://ecuyer-des-saveurs.ch: True
Updated public/graph_data.json with 36646 nodes and 81750 edges, plus visited sets.
Checking robots.txt at: https://lafermeforestier.ch/robots.txt
Can fetch https://lafermeforestier.ch: True

--- Crawling domain: lafermeforestier.ch (Domain #0) ---

Crawling page: https://lafermeforestier.ch (depth=0)
Checking robots.txt at: https://lafermeforestier.ch/robots.txt
Can fetch https://lafermeforestier.ch: True
Found 1 links at https://lafermeforestier.ch
Updated public/graph_data.json with 36646 nodes and 81751 edges, plus visited sets.
Updated public/graph_data.json with 36646 nodes and 81751 edges, plus visited sets.
Checking robots.txt at: https://legufrais.ch/robot

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://jardinsdesmartinets.ch/wp-content/uploads/2023/04/Offre_JardinsDesMartinets.xlsx

Crawling page: https://jardinsdesmartinets.ch/ (depth=1)
Checking robots.txt at: https://jardinsdesmartinets.ch/robots.txt
Can fetch https://jardinsdesmartinets.ch/: True
Found 21 links at https://jardinsdesmartinets.ch/

Crawling page: https://jardinsdesmartinets.ch/contact-2/ (depth=1)
Checking robots.txt at: https://jardinsdesmartinets.ch/robots.txt
Can fetch https://jardinsdesmartinets.ch/contact-2/: True
Found 11 links at https://jardinsdesmartinets.ch/contact-2/

Crawling page: https://jardinsdesmartinets.ch/legumes-en-abonnements/ (depth=1)
Checking robots.txt at: https://jardinsdesmartinets.ch/robots.txt
Can fetch https://jardinsdesmartinets.ch/legumes-en-abonnements/: True
Found 19 links at https://jardinsdesmartinets.ch/legumes-en-abonnements/

Crawling page: https://jardinsdesmartinets.ch/category/posts_actualites/ (depth=1)
Checking robots.txt at: https://jardinsdesmar

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://heliantheperspectives.ch/files/1691999673-qrcode-6792.png

Crawling page: https://heliantheperspectives.ch/ (depth=1)
Checking robots.txt at: https://heliantheperspectives.ch/robots.txt
Can fetch https://heliantheperspectives.ch/: True
Found 18 links at https://heliantheperspectives.ch/

Crawling page: https://heliantheperspectives.ch/services-helianthe-perspectives-fr5383.html (depth=1)
Checking robots.txt at: https://heliantheperspectives.ch/robots.txt
Can fetch https://heliantheperspectives.ch/services-helianthe-perspectives-fr5383.html: True
Found 19 links at https://heliantheperspectives.ch/services-helianthe-perspectives-fr5383.html

Crawling page: https://heliantheperspectives.ch/partenaires-helianthe-perspectives-fr2000.html (depth=1)
Checking robots.txt at: https://heliantheperspectives.ch/robots.txt
Can fetch https://heliantheperspectives.ch/partenaires-helianthe-perspectives-fr2000.html: True
Found 25 links at https://heliantheperspectives.ch/partena

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://heliantheperspectives.ch/files/1738752562--9588.png
Updated public/graph_data.json with 36938 nodes and 82424 edges, plus visited sets.
Updated public/graph_data.json with 36938 nodes and 82424 edges, plus visited sets.
Checking robots.txt at: https://herbadiberna.ch/robots.txt
Can fetch https://herbadiberna.ch: True
Updated public/graph_data.json with 36938 nodes and 82424 edges, plus visited sets.
Checking robots.txt at: https://herboland.ch/robots.txt
Can fetch https://herboland.ch: False
Updated public/graph_data.json with 36938 nodes and 82424 edges, plus visited sets.
Checking robots.txt at: https://heusser-edv.ch/robots.txt
Can fetch https://heusser-edv.ch: True
Updated public/graph_data.json with 36938 nodes and 82424 edges, plus visited sets.
Checking robots.txt at: https://highendscan.ch/robots.txt
Error checking robots.txt for https://highendscan.ch: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate (_

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/hirsgarten/hirsgarten_grundriss_eg_F.png

Crawling page: https://hueslerarchitektur.ch/bilder/hirsgarten/hirsgarten_fassaden_F.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/hirsgarten/hirsgarten_fassaden_F.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/hirsgarten/hirsgarten_fassaden_F.png

Crawling page: https://hueslerarchitektur.ch/bilder/hirsgarten/hirsgarten_schnitte_F.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/hirsgarten/hirsgarten_schnitte_F.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/hirsgarten/hirsgarten_schnitte_F.png
Skipping JPG: https://hueslerarchitektur.ch/bilder/lokremise/lokremise_1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/lokremise/lokremise_2_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/lokremise/lokremise_3_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/lokremise/lokremise_4_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/lokremise/lokremise_5_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/neuguet/neuguet_1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/neuguet/neuguet_2_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/neuguet/neuguet_4_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/neuguet/neuguet_5_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/neuguet/neuguet_6_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/baar/baar_1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/baar/baar_4_B.jpg
S

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/baar/baar_grundriss1_F.png

Crawling page: https://hueslerarchitektur.ch/bilder/baar/baar_grundriss2_F.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/baar/baar_grundriss2_F.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/baar/baar_grundriss2_F.png
Skipping JPG: https://hueslerarchitektur.ch/bilder/ascona/ascona_1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/ascona/ascona_3_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/ascona/ascona_2_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/zug/zug_1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/zug/zug_2_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/zug/zug_3_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/zug/zug_4_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/zug/zug_5_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/zug/zug_6_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/zug/zug_7_B.jpg

Crawling page: https://hueslerarchitektur.ch/bilder/2mfh/2mfh_1_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/2mfh/2mfh_1_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/2mfh/2mfh_1_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/2mfh/2mfh_2_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/2mfh/2mfh_2_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/2mfh/2mfh_2_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/2mfh/2mfh_3_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/2mfh/2mfh_3_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/2mfh/2mfh_3_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/2mfh/2mfh_4_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/2mfh/2mfh_4_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/2mfh/2mfh_4_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/2mfh/2mfh_5_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/2mfh/2mfh_5_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/2mfh/2mfh_5_B.png
Skipping JPG: https://hueslerarchitektur.ch/bilder/2mfh/2mfh_6_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/2mfh/2mfh_7_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_2_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_3_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_5_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_4_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_6_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_7_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/cham/cham_8_B.jpg

Crawling page: https://hueslerarchitektur.ch/bilder/schillinghof/fassade_sued_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/schillinghof/fassade_sued_B.png

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/schillinghof/fassade_sued_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/schillinghof/fassade_west_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/schillinghof/fassade_west_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/schillinghof/fassade_west_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/schillinghof/schnitt_b_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/schillinghof/schnitt_b_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/schillinghof/schnitt_b_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_eg_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_eg_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_eg_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_og_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_og_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_og_B.png

Crawling page: https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_dg_B.png (depth=1)
Checking robots.txt at: https://hueslerarchitektur.ch/robots.txt
Can fetch https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_dg_B.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://hueslerarchitektur.ch/bilder/schillinghof/grundriss_dg_B.png
Skipping JPG: https://hueslerarchitektur.ch/bilder/schillinghof/schillighof1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/schillinghof/schillighof2_B.jpg
Skipping PDF: https://hueslerarchitektur.ch/bilder/0003minpmenz/nachhaltigbauen.pdf
Skipping PDF: https://hueslerarchitektur.ch/bilder/0003minpmenz/rohbaubesichtigung.pdf
Skipping PDF: https://hueslerarchitektur.ch/bilder/0003minpmenz/broschuresonnenblick.pdf
Skipping JPG: https://hueslerarchitektur.ch/bilder/menzingen/menzingen_1_B.jpg
Skipping JPG: https://hueslerarchitektur.ch/bilder/menzingen/menzingen_2_B.jpg
Skipping PDF: https://hueslerarchitektur.ch/texte/MerkblattSolarZug.PDF
Skipping PDF: https://hueslerarchitektur.ch/texte/HolzbauSchweizMai2009.pdf
Skipping PDF: https://hueslerarchitektur.ch/texte/TagdesDenkmals2012.pdf
Updated public/graph_data.json with 37158 nodes and 82927 edges, plus visited sets.
Updated public/graph_data

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://pluri-elles.ch/wp-content/uploads/2025/01/Capture-decran-2025-01-02-a-09.30.10-copie.webp

Crawling page: https://pluri-elles.ch/wp-content/uploads/2025/01/Capture-decran-2025-01-02-a-09.30.52-copie.webp (depth=1)
Checking robots.txt at: https://pluri-elles.ch/robots.txt
Can fetch https://pluri-elles.ch/wp-content/uploads/2025/01/Capture-decran-2025-01-02-a-09.30.52-copie.webp: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://pluri-elles.ch/wp-content/uploads/2025/01/Capture-decran-2025-01-02-a-09.30.52-copie.webp
Skipping PDF: https://pluri-elles.ch/wp-content/uploads/2024/11/Cafe-paperasse-affiche-2025-janv-juin.pdf

Crawling page: https://pluri-elles.ch/politique-de-cookies/ (depth=1)
Checking robots.txt at: https://pluri-elles.ch/robots.txt
Can fetch https://pluri-elles.ch/politique-de-cookies/: True
Found 75 links at https://pluri-elles.ch/politique-de-cookies/

Crawling page: https://pluri-elles.ch/politique-de-confidentialite/ (depth=1)
Checking robots.txt at: https://pluri-elles.ch/robots.txt
Can fetch https://pluri-elles.ch/politique-de-confidentialite/: True
Found 62 links at https://pluri-elles.ch/politique-de-confidentialite/
Updated public/graph_data.json with 37226 nodes and 83143 edges, plus visited sets.
Updated public/graph_data.json with 37226 nodes and 83143 edges, plus visited sets.
Checking robots.txt at: https://xn--cyt-jnad.ch/robots.txt
Can fetch https://xn--

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://new.drohnenverband.ch/data/Cache/a/ae/aec/aec9/_srcaec95dca317b3c921e3fd02688a34d88_par9316563d95990bc8dea02642d8f62565_dat1616327451.jpeg
Updated public/graph_data.json with 37238 nodes and 83198 edges, plus visited sets.
Updated public/graph_data.json with 37238 nodes and 83198 edges, plus visited sets.
Checking robots.txt at: https://to-fly-productions.ch/robots.txt
Error checking robots.txt for https://to-fly-productions.ch: <urlopen error [Errno 11001] getaddrinfo failed>
contains_keyword failed for https://to-fly-productions.ch: HTTPSConnectionPool(host='to-fly-productions.ch', port=443): Max retries exceeded with url: / (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x000002E7CEEC1D60>: Failed to resolve 'to-fly-productions.ch' ([Errno 11001] getaddrinfo failed)"))
Updated public/graph_data.json with 37238 nodes and 83198 edges, plus visited sets.
Updated public/graph_data.json with 37238 nodes and 83198 edges, plus visited

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2024/12/le_proces_du_chien.png

Crawling page: https://vision-air.ch/wp-content/uploads/2024/12/the_place.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2024/12/the_place.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2024/12/the_place.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Yeti-Obijaan-Srijit-Mukherji-SVF-Entertainement-Pvt.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Yeti-Obijaan-Srijit-Mukherji-SVF-Entertainement-Pvt.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Yeti-Obijaan-Srijit-Mukherji-SVF-Entertainement-Pvt.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Tambour-battant-François-Christophe-Marzal-Pointprod.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Tambour-battant-François-Christophe-Marzal-Pointprod.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Tambour-battant-François-Christophe-Marzal-Pointprod.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Uma-Srijit-Mukherji-SVF-Entertainement-Pvt.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Uma-Srijit-Mukherji-SVF-Entertainement-Pvt.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Uma-Srijit-Mukherji-SVF-Entertainement-Pvt.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Cendres-de-Volcan-Maria-Nicollier-REC-production.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Cendres-de-Volcan-Maria-Nicollier-REC-production.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Cendres-de-Volcan-Maria-Nicollier-REC-production.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Quartier-des-banques-saison-2-Fulvio-Bernasconi-PointProd-RTS.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Quartier-des-banques-saison-2-Fulvio-Bernasconi-PointProd-RTS.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Quartier-des-banques-saison-2-Fulvio-Bernasconi-PointProd-RTS.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Quartier-des-banques-Fulvio-Bernasconi-PointProd-RTS.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Quartier-des-banques-Fulvio-Bernasconi-PointProd-RTS.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Quartier-des-banques-Fulvio-Bernasconi-PointProd-RTS.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/OMEGA-BOB-RUN.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/OMEGA-BOB-RUN.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/OMEGA-BOB-RUN.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/SCOTT-Cinemargot.ch-2018.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/SCOTT-Cinemargot.ch-2018.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/SCOTT-Cinemargot.ch-2018.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/TISSOT-PEAKWALK-SAS-Prod-2015.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/TISSOT-PEAKWALK-SAS-Prod-2015.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/TISSOT-PEAKWALK-SAS-Prod-2015.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/VICTORINOX-I.N.O.X-Artaban-films-2014.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/VICTORINOX-I.N.O.X-Artaban-films-2014.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/VICTORINOX-I.N.O.X-Artaban-films-2014.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/VICTORINOX-I.N.O.X-Artaban-films-2014_2.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/VICTORINOX-I.N.O.X-Artaban-films-2014_2.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/VICTORINOX-I.N.O.X-Artaban-films-2014_2.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/13-faces-du-Valais-David-Carlier-et-Yannick-Bacher-2015-Highline.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/13-faces-du-Valais-David-Carlier-et-Yannick-Bacher-2015-Highline.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/13-faces-du-Valais-David-Carlier-et-Yannick-Bacher-2015-Highline.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Cépages-rares-Florian-Burion-Octuor-films-2019.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Cépages-rares-Florian-Burion-Octuor-films-2019.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Cépages-rares-Florian-Burion-Octuor-films-2019.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Une-année-à-la-vigne-les-domaines-du-possible-Point-Prod-RTS-2019.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Une-année-à-la-vigne-les-domaines-du-possible-Point-Prod-RTS-2019.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Une-année-à-la-vigne-les-domaines-du-possible-Point-Prod-RTS-2019.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Une-année-à-la-vigne-les-domaines-du-possible-Point-Prod-RTS-2019_2.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Une-année-à-la-vigne-les-domaines-du-possible-Point-Prod-RTS-2019_2.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Une-année-à-la-vigne-les-domaines-du-possible-Point-Prod-RTS-2019_2.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Chasselas-for-ever-Florian-Burion-Octuor-films-2016.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Chasselas-for-ever-Florian-Burion-Octuor-films-2016.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Chasselas-for-ever-Florian-Burion-Octuor-films-2016.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Freeride-World-Tour-Hakuba-Japon-2018-2020.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Freeride-World-Tour-Hakuba-Japon-2018-2020.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Freeride-World-Tour-Hakuba-Japon-2018-2020.png

Crawling page: https://vision-air.ch/wp-content/uploads/2023/03/Runmate-2019-2022-e1679243524301.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2023/03/Runmate-2019-2022-e1679243524301.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2023/03/Runmate-2019-2022-e1679243524301.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Marathon-de-Genève-2012-2019.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Marathon-de-Genève-2012-2019.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Marathon-de-Genève-2012-2019.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/20Km-de-Genève-2016-2019.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/20Km-de-Genève-2016-2019.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/20Km-de-Genève-2016-2019.png

Crawling page: https://vision-air.ch/wp-content/uploads/2020/04/Triathlon-de-Genève-2017-2019.png (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/wp-content/uploads/2020/04/Triathlon-de-Genève-2017-2019.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://vision-air.ch/wp-content/uploads/2020/04/Triathlon-de-Genève-2017-2019.png

Crawling page: https://vision-air.ch/politique-de-confidentialite/ (depth=1)
Checking robots.txt at: https://vision-air.ch/robots.txt
Can fetch https://vision-air.ch/politique-de-confidentialite/: True
Found 1 links at https://vision-air.ch/politique-de-confidentialite/
Updated public/graph_data.json with 37247 nodes and 83210 edges, plus visited sets.
Updated public/graph_data.json with 37247 nodes and 83210 edges, plus visited sets.
Updated public/graph_data.json with 37247 nodes and 83210 edges, plus visited sets.
Updated public/graph_data.json with 37247 nodes and 83210 edges, plus visited sets.
Checking robots.txt at: https://app02.bazl.admin.ch/robots.txt
Can fetch https://app02.bazl.admin.ch: True
Updated public/graph_data.json with 37247 nodes and 83210 edges, plus visited sets.
Checking robots.txt at: https://aerodromes.ch/robots.txt
Can fetch https://aerodromes.ch: True
Update

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://shiatsuverband.ch/redaktioneller-beitrag-2025-zeitschrift-oliv/

Crawling page: https://shiatsuverband.ch/category/news-feed-print/ (depth=1)
Checking robots.txt at: https://shiatsuverband.ch/robots.txt
Can fetch https://shiatsuverband.ch/category/news-feed-print/: True
Found 98 links at https://shiatsuverband.ch/category/news-feed-print/

Crawling page: https://shiatsuverband.ch/redaktioneller-beitrag-2024-forum_kinesiologie/ (depth=1)
Checking robots.txt at: https://shiatsuverband.ch/robots.txt
Can fetch https://shiatsuverband.ch/redaktioneller-beitrag-2024-forum_kinesiologie/: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://shiatsuverband.ch/redaktioneller-beitrag-2024-forum_kinesiologie/

Crawling page: https://shiatsuverband.ch/redaktioneller-beitrag-2024-ftmedien/ (depth=1)
Checking robots.txt at: https://shiatsuverband.ch/robots.txt
Can fetch https://shiatsuverband.ch/redaktioneller-beitrag-2024-ftmedien/: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://shiatsuverband.ch/redaktioneller-beitrag-2024-ftmedien/

Crawling page: https://shiatsuverband.ch/frauengesundheit-und-shiatsu/ (depth=1)
Checking robots.txt at: https://shiatsuverband.ch/robots.txt
Can fetch https://shiatsuverband.ch/frauengesundheit-und-shiatsu/: True
Found 56 links at https://shiatsuverband.ch/frauengesundheit-und-shiatsu/

Crawling page: https://shiatsuverband.ch/category/methode/ (depth=1)
Checking robots.txt at: https://shiatsuverband.ch/robots.txt
Can fetch https://shiatsuverband.ch/category/methode/: True
Found 97 links at https://shiatsuverband.ch/category/methode/

Crawling page: https://shiatsuverband.ch/grundlagen-des-shiatsu-am-beispiel-junger-erwachsener/ (depth=1)
Checking robots.txt at: https://shiatsuverband.ch/robots.txt
Can fetch https://shiatsuverband.ch/grundlagen-des-shiatsu-am-beispiel-junger-erwachsener/: True
Found 60 links at https://shiatsuverband.ch/grundlagen-des-shiatsu-am-beispiel-junger-erwachsener/

Crawling pag

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://csp-vallon.ch/wp-content/uploads/2022/04/Carte-Vallon.png

Crawling page: https://csp-vallon.ch/Cloud/ (depth=1)
Checking robots.txt at: https://csp-vallon.ch/robots.txt
Can fetch https://csp-vallon.ch/Cloud/: True
Found 2 links at https://csp-vallon.ch/Cloud/
Updated public/graph_data.json with 37620 nodes and 84159 edges, plus visited sets.
Updated public/graph_data.json with 37620 nodes and 84159 edges, plus visited sets.
Checking robots.txt at: https://emic-lina.ch/robots.txt
Can fetch https://emic-lina.ch: True
Updated public/graph_data.json with 37620 nodes and 84159 edges, plus visited sets.
Checking robots.txt at: https://le-carillon.ch/robots.txt
Can fetch https://le-carillon.ch: True
Updated public/graph_data.json with 37620 nodes and 84159 edges, plus visited sets.
Updated public/graph_data.json with 37620 nodes and 84159 edges, plus visited sets.
Checking robots.txt at: https://jbevequoz.ch/robots.txt
Can fetch https://jbevequoz.ch: True
Updated pub

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://reseau-sante-haut-leman.ch/rshl

Crawling page: https://reseau-sante-haut-leman.ch/rslc (depth=1)
Checking robots.txt at: https://reseau-sante-haut-leman.ch/robots.txt
Can fetch https://reseau-sante-haut-leman.ch/rslc: True
Failed to extract links from https://reseau-sante-haut-leman.ch/rslc: 404 Client Error: Not Found for url: https://reseau-sante-haut-leman.ch/rslc
Found 0 links at https://reseau-sante-haut-leman.ch/rslc

Crawling page: https://reseau-sante-haut-leman.ch/rsrl (depth=1)
Checking robots.txt at: https://reseau-sante-haut-leman.ch/robots.txt
Can fetch https://reseau-sante-haut-leman.ch/rsrl: True
Failed to extract links from https://reseau-sante-haut-leman.ch/rsrl: 404 Client Error: Not Found for url: https://reseau-sante-haut-leman.ch/rsrl
Found 0 links at https://reseau-sante-haut-leman.ch/rsrl

Crawling page: https://reseau-sante-haut-leman.ch/rsnb (depth=1)
Checking robots.txt at: https://reseau-sante-haut-leman.ch/robots.txt
Can fetch https

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img4.png

Crawling page: https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img3.png (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Can fetch https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img3.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img3.png

Crawling page: https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img2.png (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Can fetch https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img2.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img2.png

Crawling page: https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img5.png (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Can fetch https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img5.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img5.png

Crawling page: https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img6.png (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Can fetch https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img6.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img6.png

Crawling page: https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img1.png (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Can fetch https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img1.png: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://boulangeriemettraux.ch/wp-content/uploads/2021/11/img1.png

Crawling page: https://boulangeriemettraux.ch/home/ (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Can fetch https://boulangeriemettraux.ch/home/: True
Failed to extract links from https://boulangeriemettraux.ch/home/: 404 Client Error: Not Found for url: https://boulangeriemettraux.ch/home/
Found 0 links at https://boulangeriemettraux.ch/home/

Crawling page: https://boulangeriemettraux.ch/produits/ (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Can fetch https://boulangeriemettraux.ch/produits/: True
Failed to extract links from https://boulangeriemettraux.ch/produits/: 404 Client Error: Not Found for url: https://boulangeriemettraux.ch/produits/
Found 0 links at https://boulangeriemettraux.ch/produits/

Crawling page: https://boulangeriemettraux.ch/confidentialite/ (depth=1)
Checking robots.txt at: https://boulangeriemettraux.ch/robots.txt
Ca

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://stopfrelons.ch/documentsDFA/2024_08_20_vierge.docx
Skipping PDF: https://stopfrelons.ch/documentsDFA_secu/2024_08_20_mex.pdf
Updated public/graph_data.json with 38454 nodes and 85621 edges, plus visited sets.
Updated public/graph_data.json with 38454 nodes and 85621 edges, plus visited sets.
Checking robots.txt at: https://lussy-sur-morges.ch/robots.txt
Can fetch https://lussy-sur-morges.ch: True

--- Crawling domain: lussy-sur-morges.ch (Domain #0) ---

Crawling page: https://lussy-sur-morges.ch (depth=0)
Checking robots.txt at: https://lussy-sur-morges.ch/robots.txt
Can fetch https://lussy-sur-morges.ch: True
Found 67 links at https://lussy-sur-morges.ch

Crawling page: https://lussy-sur-morges.ch/ (depth=1)
Checking robots.txt at: https://lussy-sur-morges.ch/robots.txt
Can fetch https://lussy-sur-morges.ch/: True
Found 67 links at https://lussy-sur-morges.ch/

Crawling page: https://lussy-sur-morges.ch/votre-commune (depth=1)
Checking robots.txt at: https://

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://veyron-venoge.ch/wp-content/uploads/2025/01/six.jpeg
Skipping JPG: https://veyron-venoge.ch/wp-content/uploads/2025/01/un.jpg
Skipping JPG: https://veyron-venoge.ch/wp-content/uploads/2025/01/cinq.jpg
Skipping JPG: https://veyron-venoge.ch/wp-content/uploads/2025/01/six.jpg
Skipping JPG: https://veyron-venoge.ch/wp-content/uploads/2025/01/quatre.jpg

Crawling page: https://veyron-venoge.ch/wp-content/uploads/2025/01/trois.jpeg (depth=1)
Checking robots.txt at: https://veyron-venoge.ch/robots.txt
Can fetch https://veyron-venoge.ch/wp-content/uploads/2025/01/trois.jpeg: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://veyron-venoge.ch/wp-content/uploads/2025/01/trois.jpeg
Skipping JPG: https://veyron-venoge.ch/wp-content/uploads/2025/01/deux.jpg

Crawling page: https://veyron-venoge.ch/wp-content/uploads/2025/01/quatre.jpeg (depth=1)
Checking robots.txt at: https://veyron-venoge.ch/robots.txt
Can fetch https://veyron-venoge.ch/wp-content/uploads/2025/01/quatre.jpeg: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://veyron-venoge.ch/wp-content/uploads/2025/01/quatre.jpeg
Skipping JPG: https://veyron-venoge.ch/wp-content/uploads/2025/01/huit.jpg
Skipping JPG: https://veyron-venoge.ch/wp-content/uploads/2025/01/deux-1.jpg

Crawling page: https://veyron-venoge.ch/wp-content/uploads/2025/01/cinq.jpeg (depth=1)
Checking robots.txt at: https://veyron-venoge.ch/robots.txt
Can fetch https://veyron-venoge.ch/wp-content/uploads/2025/01/cinq.jpeg: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://veyron-venoge.ch/wp-content/uploads/2025/01/cinq.jpeg
Updated public/graph_data.json with 38490 nodes and 85770 edges, plus visited sets.
Updated public/graph_data.json with 38490 nodes and 85770 edges, plus visited sets.
Checking robots.txt at: https://francs-tireurs.ch/robots.txt
Can fetch https://francs-tireurs.ch: True
Updated public/graph_data.json with 38490 nodes and 85770 edges, plus visited sets.
Checking robots.txt at: https://peche-svpr.ch/robots.txt
Can fetch https://peche-svpr.ch: True
Updated public/graph_data.json with 38490 nodes and 85770 edges, plus visited sets.
Checking robots.txt at: https://espace-emploi.ch/robots.txt
Error checking robots.txt for https://espace-emploi.ch: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: Hostname mismatch, certificate is not valid for 'espace-emploi.ch'. (_ssl.c:1000)>
contains_keyword failed for https://espace-emploi.ch: HTTPSConnectionPool(host='espace-emploi.ch', port=443): Max

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.camping-huettenberg.ch/wp-content/uploads/camping_info_24.png

Crawling page: https://www.camping-huettenberg.ch/erreichbarkeit-ueber-die-herbst-winterzeit-2/ (depth=1)
Checking robots.txt at: https://www.camping-huettenberg.ch/robots.txt
Can fetch https://www.camping-huettenberg.ch/erreichbarkeit-ueber-die-herbst-winterzeit-2/: True
Found 99 links at https://www.camping-huettenberg.ch/erreichbarkeit-ueber-die-herbst-winterzeit-2/

Crawling page: https://www.camping-huettenberg.ch/jahreswechsel-3/ (depth=1)
Checking robots.txt at: https://www.camping-huettenberg.ch/robots.txt
Can fetch https://www.camping-huettenberg.ch/jahreswechsel-3/: True
Found 98 links at https://www.camping-huettenberg.ch/jahreswechsel-3/

Crawling page: https://www.camping-huettenberg.ch/weihnachten-3/ (depth=1)
Checking robots.txt at: https://www.camping-huettenberg.ch/robots.txt
Can fetch https://www.camping-huettenberg.ch/weihnachten-3/: True
Found 96 links at https://www.camping-

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://human-blossom.ch/wp-content/uploads/2017/06/Audio_La-Girafe.mp3

Crawling page: https://human-blossom.ch/wp-content/uploads/2017/06/Audio_Experte.mp3 (depth=1)
Checking robots.txt at: https://human-blossom.ch/robots.txt
Can fetch https://human-blossom.ch/wp-content/uploads/2017/06/Audio_Experte.mp3: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://human-blossom.ch/wp-content/uploads/2017/06/Audio_Experte.mp3

Crawling page: https://human-blossom.ch/wp-content/uploads/2017/06/Audio_Cornichons.mp3 (depth=1)
Checking robots.txt at: https://human-blossom.ch/robots.txt
Can fetch https://human-blossom.ch/wp-content/uploads/2017/06/Audio_Cornichons.mp3: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://human-blossom.ch/wp-content/uploads/2017/06/Audio_Cornichons.mp3

Crawling page: https://human-blossom.ch/pablo-nureda-poeme/ (depth=1)
Checking robots.txt at: https://human-blossom.ch/robots.txt
Can fetch https://human-blossom.ch/pablo-nureda-poeme/: True
Found 13 links at https://human-blossom.ch/pablo-nureda-poeme/
Updated public/graph_data.json with 39395 nodes and 87334 edges, plus visited sets.
Updated public/graph_data.json with 39395 nodes and 87334 edges, plus visited sets.
Checking robots.txt at: https://arcid.ch/robots.txt
Can fetch https://arcid.ch: True
Updated public/graph_data.json with 39395 nodes and 87334 edges, plus visited sets.
Checking robots.txt at: https://giqual.ch/robots.txt
Can fetch https://giqual.ch: True
Updated public/graph_data.json with 39395 nodes and 87334 edges, plus visited sets.
Checking robots.txt at: https://ohanami.ch/robots.txt
Can fetch https://ohanami.ch: True
Updated public/graph_data.json with 39395 nodes and 87334 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://montee-nozon.ch/wp-content/uploads/2015/02/Signature-SF.gif
Updated public/graph_data.json with 39411 nodes and 87361 edges, plus visited sets.
Updated public/graph_data.json with 39411 nodes and 87361 edges, plus visited sets.
Checking robots.txt at: https://auto-olivier.ch/robots.txt
Can fetch https://auto-olivier.ch: True
Updated public/graph_data.json with 39411 nodes and 87361 edges, plus visited sets.
Checking robots.txt at: https://chorus.ch/robots.txt
Can fetch https://chorus.ch: True
Updated public/graph_data.json with 39411 nodes and 87361 edges, plus visited sets.
Checking robots.txt at: https://almatech.ch/robots.txt
Can fetch https://almatech.ch: True
Updated public/graph_data.json with 39411 nodes and 87361 edges, plus visited sets.
Checking robots.txt at: https://myrun4help.ch/robots.txt
Can fetch https://myrun4help.ch: True
Updated public/graph_data.json with 39411 nodes and 87361 edges, plus visited sets.
Checking robots.txt at: https://pro-vel

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.zermatterhof.ch/wp-content/uploads/2022/02/hotel-with-plaza-1024x683.jpg.webp

Crawling page: https://www.zermatterhof.ch/wp-content/uploads/2022/04/Zermatterhof-Lobby-1-min-1-1024x683.jpg.webp (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/wp-content/uploads/2022/04/Zermatterhof-Lobby-1-min-1-1024x683.jpg.webp: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.zermatterhof.ch/wp-content/uploads/2022/04/Zermatterhof-Lobby-1-min-1-1024x683.jpg.webp

Crawling page: https://www.zermatterhof.ch/wp-content/uploads/2022/04/BreakfastTable2-1024x683.jpg.webp (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/wp-content/uploads/2022/04/BreakfastTable2-1024x683.jpg.webp: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.zermatterhof.ch/wp-content/uploads/2022/04/BreakfastTable2-1024x683.jpg.webp

Crawling page: https://www.zermatterhof.ch/wp-content/uploads/2022/02/classic-superior-one-bedroom-suite-room-detail-1024x693.jpg.webp (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/wp-content/uploads/2022/02/classic-superior-one-bedroom-suite-room-detail-1024x693.jpg.webp: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.zermatterhof.ch/wp-content/uploads/2022/02/classic-superior-one-bedroom-suite-room-detail-1024x693.jpg.webp

Crawling page: https://www.zermatterhof.ch/wp-content/uploads/2022/02/lusi-oysters-steaktartare-1024x683.jpg.webp (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/wp-content/uploads/2022/02/lusi-oysters-steaktartare-1024x683.jpg.webp: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.zermatterhof.ch/wp-content/uploads/2022/02/lusi-oysters-steaktartare-1024x683.jpg.webp

Crawling page: https://www.zermatterhof.ch/wp-content/uploads/2022/02/horse-2-1024x679.jpg.webp (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/wp-content/uploads/2022/02/horse-2-1024x679.jpg.webp: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.zermatterhof.ch/wp-content/uploads/2022/02/horse-2-1024x679.jpg.webp

Crawling page: https://www.zermatterhof.ch/our-people/ (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/our-people/: True
Found 47 links at https://www.zermatterhof.ch/our-people/

Crawling page: https://www.zermatterhof.ch/sustainability/ (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/sustainability/: True
Found 51 links at https://www.zermatterhof.ch/sustainability/

Crawling page: https://www.zermatterhof.ch/restaurants-and-bars/alpine-gourmet-prato-borni/ (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/restaurants-and-bars/alpine-gourmet-prato-borni/: True
Found 69 links at https://www.zermatterhof.ch/restaurants-and-bars/alpine-gourmet-prato-borni/

Crawling page: https://www.zermatterhof.ch/restau

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.zermatterhof.ch/wp-content/uploads/2022/04/grand-zermatterhof-map.jpg.webp

Crawling page: https://www.zermatterhof.ch/partnerships/ (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/partnerships/: True
Found 53 links at https://www.zermatterhof.ch/partnerships/

Crawling page: https://www.zermatterhof.ch/press/ (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/press/: True
Found 56 links at https://www.zermatterhof.ch/press/

Crawling page: https://www.zermatterhof.ch/careers/ (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/careers/: True
Found 48 links at https://www.zermatterhof.ch/careers/

Crawling page: https://www.zermatterhof.ch/terms-and-conditions/ (depth=1)
Checking robots.txt at: https://www.zermatterhof.ch/robots.txt
Can fetch https://www.zermatterhof.ch/terms-an

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://genoudb.ch/pieces_detachees_john_deere_2022/

Crawling page: https://genoudb.ch/collection_john_deere_2022/ (depth=1)
Checking robots.txt at: https://genoudb.ch/robots.txt
Can fetch https://genoudb.ch/collection_john_deere_2022/: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://genoudb.ch/collection_john_deere_2022/

Crawling page: https://www.genoudb.ch (depth=1)
Checking robots.txt at: https://www.genoudb.ch/robots.txt
Can fetch https://www.genoudb.ch: True
Found 25 links at https://www.genoudb.ch
Updated public/graph_data.json with 39849 nodes and 88445 edges, plus visited sets.
Updated public/graph_data.json with 39849 nodes and 88445 edges, plus visited sets.
Checking robots.txt at: https://globogal.ch/robots.txt
Can fetch https://globogal.ch: True
Updated public/graph_data.json with 39849 nodes and 88445 edges, plus visited sets.
Checking robots.txt at: https://griesser-kaeltetechnik.ch/robots.txt
Can fetch https://griesser-kaeltetechnik.ch: True
Updated public/graph_data.json with 39849 nodes and 88445 edges, plus visited sets.
Checking robots.txt at: https://grunderco.ch/robots.txt
Can fetch https://grunderco.ch: True

--- Crawling domain: grunderco.ch (Domain #0) ---

Crawling page: https://grunderco.ch (depth=0)
Checking rob

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2024/11/20241025_181827.jpeg

Crawling page: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2024/11/20241025_182034.jpeg (depth=1)
Checking robots.txt at: https://zimmermann-stalltechnik.ch/robots.txt
Can fetch https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2024/11/20241025_182034.jpeg: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2024/11/20241025_182034.jpeg

Crawling page: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2024/11/20241025_183914.jpeg (depth=1)
Checking robots.txt at: https://zimmermann-stalltechnik.ch/robots.txt
Can fetch https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2024/11/20241025_183914.jpeg: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2024/11/20241025_183914.jpeg
Skipping JPG: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2021/04/zimmermann-stalltechnik-referenz-berktold-7.jpg
Skipping JPG: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2021/04/zimmermann-stalltechnik-referenz-berktold-8.jpg
Skipping JPG: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2021/04/zimmermann-stalltechnik-referenz-berktold-6.jpg
Skipping JPG: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2021/04/zimmermann-stalltechnik-referenz-berktold-5.jpg
Skipping JPG: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2021/04/zimmermann-stalltechnik-referenz-berktold-4.jpg
Skipping JPG: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2021/04/zimmermann-stalltechnik-referenz-berktold-3.jpg
Skipping JPG: https://zimmermann-stalltechnik.ch/wp-content/uploads/sites/3/2021/04/zimmermann-stall

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://recytech.ch/agb

Crawling page: https://recytech.ch/impressum (depth=1)
Checking robots.txt at: https://recytech.ch/robots.txt
Can fetch https://recytech.ch/impressum: True
Found 100 links at https://recytech.ch/impressum

Crawling page: https://recytech.ch/datenschutzerklaerung (depth=1)
Checking robots.txt at: https://recytech.ch/robots.txt
Can fetch https://recytech.ch/datenschutzerklaerung: True
Found 110 links at https://recytech.ch/datenschutzerklaerung
Updated public/graph_data.json with 39976 nodes and 88726 edges, plus visited sets.
Updated public/graph_data.json with 39976 nodes and 88726 edges, plus visited sets.
Checking robots.txt at: https://strebel-hagmatt.ch/robots.txt
Can fetch https://strebel-hagmatt.ch: True
Updated public/graph_data.json with 39976 nodes and 88726 edges, plus visited sets.
Updated public/graph_data.json with 39976 nodes and 88726 edges, plus visited sets.
Checking robots.txt at: https://hoelzle.ch/robots.txt
Can fetch https:

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.liestaler.ch/wp-content/uploads/2021/12/IMG_2363.jpeg

Crawling page: https://www.liestaler.ch/wp-content/uploads/2021/12/IMG_0969.jpeg (depth=1)
Checking robots.txt at: https://www.liestaler.ch/robots.txt
Can fetch https://www.liestaler.ch/wp-content/uploads/2021/12/IMG_0969.jpeg: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.liestaler.ch/wp-content/uploads/2021/12/IMG_0969.jpeg
Skipping JPG: https://www.liestaler.ch/wp-content/uploads/2017/01/20151027_153348.jpg
Skipping JPG: https://www.liestaler.ch/wp-content/uploads/2022/08/Weine2022-1-scaled.jpg
Updated public/graph_data.json with 40010 nodes and 88826 edges, plus visited sets.
Updated public/graph_data.json with 40010 nodes and 88826 edges, plus visited sets.
Checking robots.txt at: https://info@siebe-dupf.ch/robots.txt
Error checking robots.txt for https://info@siebe-dupf.ch: <urlopen error [Errno 11003] getaddrinfo failed>
Updated public/graph_data.json with 40010 nodes and 88826 edges, plus visited sets.
Checking robots.txt at: https://siebe-dupf.ch/robots.txt
Can fetch https://siebe-dupf.ch: True
Updated public/graph_data.json with 40010 nodes and 88826 edges, plus visited sets.
Checking robots.txt at: https://info@syydebaendel.ch/robots.txt
Error checking robots.txt for https://info@syydebaendel.ch: <urlopen error [Er

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/startseite/lilo_lobpreis

Crawling page: https://www.schleife.ch/rueckblick/csm_xund_7_f56c4e6269 (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/rueckblick/csm_xund_7_f56c4e6269: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/rueckblick/csm_xund_7_f56c4e6269

Crawling page: https://www.schleife.ch/rueckblick/csm_20211002_rise-up-frauenkonferenz_rise-up_02_10_2021_web1x0a9694_639c8304ed (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/rueckblick/csm_20211002_rise-up-frauenkonferenz_rise-up_02_10_2021_web1x0a9694_639c8304ed: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/rueckblick/csm_20211002_rise-up-frauenkonferenz_rise-up_02_10_2021_web1x0a9694_639c8304ed

Crawling page: https://www.schleife.ch/startseite/rise_up_rebecca_marcus_watta (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/startseite/rise_up_rebecca_marcus_watta: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/startseite/rise_up_rebecca_marcus_watta

Crawling page: https://www.schleife.ch/startseite/rise_up_anbetung (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/startseite/rise_up_anbetung: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/startseite/rise_up_anbetung

Crawling page: https://www.schleife.ch/startseite/m_watta (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/startseite/m_watta: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/startseite/m_watta

Crawling page: https://www.schleife.ch/startseite/steph (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/startseite/steph: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/startseite/steph

Crawling page: https://www.schleife.ch/startseite/a_keller_ansteckende_herrlichkeit (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/startseite/a_keller_ansteckende_herrlichkeit: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/startseite/a_keller_ansteckende_herrlichkeit

Crawling page: https://www.schleife.ch/20210904_partnertag_web-2045 (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/20210904_partnertag_web-2045: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.schleife.ch/20210904_partnertag_web-2045

Crawling page: https://www.schleife.ch/zahlung-und-versand (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/zahlung-und-versand: True
Found 142 links at https://www.schleife.ch/zahlung-und-versand

Crawling page: https://www.schleife.ch/widerrufsrecht (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/widerrufsrecht: True
Found 142 links at https://www.schleife.ch/widerrufsrecht

Crawling page: https://www.schleife.ch/impressum (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/impressum: True
Found 145 links at https://www.schleife.ch/impressum

Crawling page: https://www.schleife.ch/datenschutz (depth=1)
Checking robots.txt at: https://www.schleife.ch/robots.txt
Can fetch https://www.schleife.ch/datenschutz: True
Found 169 links at https://www.schleife.ch/dat

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://agrotechbourquiservices.ch/_downloads/6ea58e42454ed99a9d8cab28d4666150
Updated public/graph_data.json with 40712 nodes and 90232 edges, plus visited sets.
Updated public/graph_data.json with 40712 nodes and 90232 edges, plus visited sets.
Checking robots.txt at: https://brindaventure.ch/robots.txt
Can fetch https://brindaventure.ch: True
Updated public/graph_data.json with 40712 nodes and 90232 edges, plus visited sets.
Checking robots.txt at: https://jbuchmann.ch/robots.txt
Error checking robots.txt for https://jbuchmann.ch: <urlopen error [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:1000)>
contains_keyword failed for https://jbuchmann.ch: HTTPSConnectionPool(host='jbuchmann.ch', port=443): Max retries exceeded with url: / (Caused by SSLError(SSLError(1, '[SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:1000)')))
Updated public/graph_data.json with 40712 nodes and 90232 edges, plus visited sets.
Updated p

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://festival-transition.ch/wp-content/uploads/2024/05/Microlino.jpeg

Crawling page: https://festival-transition.ch/wp-content/uploads/2024/05/eCarla.jpeg (depth=1)
Checking robots.txt at: https://festival-transition.ch/robots.txt
Can fetch https://festival-transition.ch/wp-content/uploads/2024/05/eCarla.jpeg: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://festival-transition.ch/wp-content/uploads/2024/05/eCarla.jpeg
Skipping JPG: https://festival-transition.ch/wp-content/uploads/2024/05/BMW_CE04.jpg
Skipping JPG: https://festival-transition.ch/wp-content/uploads/2024/05/Ami.jpg

Crawling page: https://festival-transition.ch/wp-content/uploads/2024/05/Libbri-scaled.webp (depth=1)
Checking robots.txt at: https://festival-transition.ch/robots.txt
Can fetch https://festival-transition.ch/wp-content/uploads/2024/05/Libbri-scaled.webp: True


Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://festival-transition.ch/wp-content/uploads/2024/05/Libbri-scaled.webp
Updated public/graph_data.json with 40846 nodes and 90728 edges, plus visited sets.
Updated public/graph_data.json with 40846 nodes and 90728 edges, plus visited sets.
Checking robots.txt at: https://ecublens-plage.ch/robots.txt
Can fetch https://ecublens-plage.ch: True

--- Crawling domain: ecublens-plage.ch (Domain #0) ---

Crawling page: https://ecublens-plage.ch (depth=0)
Checking robots.txt at: https://ecublens-plage.ch/robots.txt
Can fetch https://ecublens-plage.ch: True
Found 20 links at https://ecublens-plage.ch

Crawling page: https://ecublens-plage.ch/ (depth=1)
Checking robots.txt at: https://ecublens-plage.ch/robots.txt
Can fetch https://ecublens-plage.ch/: True
Found 20 links at https://ecublens-plage.ch/

Crawling page: https://ecublens-plage.ch/edition-2023/ (depth=1)
Checking robots.txt at: https://ecublens-plage.ch/robots.txt
Can fetch https://ecublens-plage.ch/edition-2023/: 

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://gregio.ch/wp-content/uploads/2024/02/HP3.png

Crawling page: https://gregio.ch/projekt/energiepark/ (depth=1)
Checking robots.txt at: https://gregio.ch/robots.txt
Can fetch https://gregio.ch/projekt/energiepark/: True
Found 17 links at https://gregio.ch/projekt/energiepark/

Crawling page: https://gregio.ch/jeder-bauernhof-ist-eine-batterie/ (depth=1)
Checking robots.txt at: https://gregio.ch/robots.txt
Can fetch https://gregio.ch/jeder-bauernhof-ist-eine-batterie/: True
Found 19 links at https://gregio.ch/jeder-bauernhof-ist-eine-batterie/

Crawling page: https://gregio.ch/walter-anlagen-an-der-forstmesse-luzern/ (depth=1)
Checking robots.txt at: https://gregio.ch/robots.txt
Can fetch https://gregio.ch/walter-anlagen-an-der-forstmesse-luzern/: True
Found 18 links at https://gregio.ch/walter-anlagen-an-der-forstmesse-luzern/

Crawling page: https://gregio.ch/produkt/biogas/ (depth=1)
Checking robots.txt at: https://gregio.ch/robots.txt
Can fetch https://gregio.

Some characters could not be decoded, and were replaced with REPLACEMENT CHARACTER.


Found 0 links at https://www.restaurant-alpenhof.ch/wp-content/uploads/2023/01/Power-Point-Anfahrt-Restaurant-Alpenhof.pptx
Updated public/graph_data.json with 40970 nodes and 91016 edges, plus visited sets.
Updated public/graph_data.json with 40970 nodes and 91016 edges, plus visited sets.
Checking robots.txt at: https://erf-medien.ch/robots.txt
Can fetch https://erf-medien.ch: True
Updated public/graph_data.json with 40970 nodes and 91016 edges, plus visited sets.
Updated public/graph_data.json with 40970 nodes and 91016 edges, plus visited sets.
Updated public/graph_data.json with 40970 nodes and 91016 edges, plus visited sets.
Updated public/graph_data.json with 40970 nodes and 91016 edges, plus visited sets.
Checking robots.txt at: https://kids-tour.ch/robots.txt
Error checking robots.txt for https://kids-tour.ch: <urlopen error [SSL: TLSV1_ALERT_INTERNAL_ERROR] tlsv1 alert internal error (_ssl.c:1000)>
contains_keyword failed for https://kids-tour.ch: HTTPSConnectionPool(host='ki

Make Graph_data-filtered

In [1]:
import json

# Load the original graph data
with open('public/graph_data.json', 'r') as f:
    data = json.load(f)

# Filter nodes with "status" containing "Relevant"
filtered_nodes = [node for node in data['nodes'] if 'Relevant' in node['status']]

# Debugging: Print the number of filtered nodes
print(f"Number of filtered nodes: {len(filtered_nodes)}")

# Get the IDs of the filtered nodes
filtered_node_ids = {node['id'] for node in filtered_nodes}

# Debugging: Print the filtered node IDs
print(f"Filtered node IDs: {filtered_node_ids}")

# Filter edges that connect the filtered nodes
filtered_edges = [edge for edge in data['edges'] if edge['source'] in filtered_node_ids and edge['target'] in filtered_node_ids]

# Debugging: Print the number of filtered edges
print(f"Number of filtered edges: {len(filtered_edges)}")

# Create the filtered graph data
filtered_data = {
    'nodes': filtered_nodes,
    'edges': filtered_edges
}

# Write the filtered data to a new JSON file
with open('public/graph_data_filtered.json', 'w') as f:
    json.dump(filtered_data, f, indent=2)

print("Filtered graph data has been written to graph_data_filtered.json")

Number of filtered nodes: 3852
Filtered node IDs: {24576, 1, 24577, 32769, 16388, 24581, 24583, 9, 16395, 12, 8204, 32784, 20, 22, 24598, 24, 24603, 29, 8224, 16417, 8229, 16421, 24615, 8235, 24623, 32815, 53, 24635, 24636, 61, 32832, 32833, 32834, 24644, 32838, 16456, 24648, 24651, 24652, 24653, 24654, 79, 24657, 16472, 90, 94, 97, 16481, 100, 8292, 8293, 111, 8303, 125, 16510, 16512, 16513, 134, 16522, 16531, 24726, 16536, 24735, 16544, 24741, 16550, 167, 16551, 16554, 8363, 8367, 16562, 24754, 24755, 32949, 16566, 16571, 193, 8388, 16584, 32970, 8396, 16590, 32974, 16592, 16594, 214, 215, 8408, 16599, 218, 219, 8410, 16600, 24791, 223, 8416, 32985, 226, 234, 236, 8430, 239, 8431, 16622, 16625, 243, 244, 8438, 250, 253, 24836, 264, 24843, 271, 24847, 273, 33042, 275, 33044, 277, 16661, 16664, 281, 16665, 24857, 284, 8476, 8477, 287, 24861, 290, 24867, 16676, 24868, 16678, 295, 296, 300, 301, 8492, 303, 304, 16687, 306, 307, 16688, 16689, 24888, 24892, 16701, 16706, 323, 324, 16708, 3