In [2]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import time

def get_internal_links(url, domain_name, visited):
    """
    Given a URL, this function finds all links within the same domain.
    """
    try:
        # Request the URL content
        response = requests.get(url)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve {url}: {e}")
        return []

    # Parse the page content
    soup = BeautifulSoup(response.text, 'html.parser')
    internal_links = []

    # Find all <a> tags with href attributes
    for link in soup.find_all('a', href=True):
        href = link.get('href')
        # Join relative URLs to form absolute URLs
        full_url = urljoin(url, href)
        # Check if the link is within the same domain
        if urlparse(full_url).netloc == domain_name and full_url not in visited:
            internal_links.append(full_url)

    return internal_links

def crawl_wiki(url, visited):
    """
    Recursively traverses links on a wiki page and prints internal links within the same domain.
    """
    domain_name = urlparse(url).netloc
    if url in visited:
        return

    print(f"Crawling: {url}")
    visited.add(url)

    # Get internal links on the page
    internal_links = get_internal_links(url, domain_name, visited)

    for link in internal_links:
        if link not in visited:
            crawl_wiki(link, visited)  # Recursive call

# Start URL
start_url = "https://criminalminds.fandom.com/wiki/Criminal_Minds_Wiki"
visited_urls = set()

# Begin crawling from the start URL
crawl_wiki(start_url, visited_urls)


Crawling: https://criminalminds.fandom.com/wiki/Criminal_Minds_Wiki
Crawling: https://criminalminds.fandom.com
Crawling: https://criminalminds.fandom.com/f
Crawling: https://criminalminds.fandom.com/wiki/Special:Search
Crawling: https://criminalminds.fandom.com/wiki/Special:AllPages
Crawling: https://criminalminds.fandom.com/wiki/Special:Community
Crawling: https://criminalminds.fandom.com/wiki/Special:AllMaps
Crawling: https://criminalminds.fandom.com/Blog:Recent_posts
Crawling: https://criminalminds.fandom.com/wiki/Criminal_Minds
Crawling: https://criminalminds.fandom.com/wiki/Episodes
Crawling: https://criminalminds.fandom.com/wiki/Season_Fourteen
Crawling: https://criminalminds.fandom.com/wiki/Season_Thirteen
Crawling: https://criminalminds.fandom.com/wiki/Season_Twelve
Crawling: https://criminalminds.fandom.com/wiki/Season_Eleven
Crawling: https://criminalminds.fandom.com/wiki/Season_Ten
Crawling: https://criminalminds.fandom.com/wiki/Season_Nine
Crawling: https://criminalminds.fa

KeyboardInterrupt: 