In [3]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

def crawl_website(url, max_pages=100):
    visited = set()
    to_visit = [url]
    base_url = f"{urlparse(url).scheme}://{urlparse(url).netloc}"
    career_keywords = ['career', 'careers', 'job', 'jobs', 'opportunities', 'employment', 'work', 'position', 'vacancy']
    career_urls = []

    while to_visit and len(visited) < max_pages:
        current_url = to_visit.pop(0)
        
        if current_url not in visited:
            print(f"Crawling: {current_url}")
            visited.add(current_url)

            try:
                response = requests.get(current_url)
                if response.status_code == 200:
                    soup = BeautifulSoup(response.text, 'html.parser')
                    
                    # Check if the current page contains career-related keywords
                    page_text = soup.get_text().lower()
                    if any(keyword in page_text for keyword in career_keywords):
                        career_urls.append(current_url)
                    
                    for link in soup.find_all('a', href=True):
                        full_url = urljoin(base_url, link['href'])
                        if full_url.startswith(base_url) and full_url not in visited:
                            to_visit.append(full_url)
            except Exception as e:
                print(f"Error crawling {current_url}: {str(e)}")

    return visited, career_urls

# Usage
website_url = "https://www.sensiblemoney.com"  # Replace with the website you want to crawl
subroutes, career_pages = crawl_website(website_url)

print("\nDiscovered subroutes:")
for route in subroutes:
    print(route)

print("\nCareer-related pages:")
for page in career_pages:
    print(page)

Crawling: https://www.sensiblemoney.com
Crawling: https://www.sensiblemoney.com#content
Crawling: https://www.sensiblemoney.com/
Crawling: https://www.sensiblemoney.com/financial-planners/
Crawling: https://www.sensiblemoney.com/retirement-experts/
Crawling: https://www.sensiblemoney.com/investment-planning-services/
Crawling: https://www.sensiblemoney.com/learn/
Crawling: https://www.sensiblemoney.com/scottsdale-phoenix-financial-planner/
Crawling: https://www.sensiblemoney.com#elementor-action%3Aaction%3Dpopup%3Aopen%26settings%3DeyJpZCI6IjEwMzQxIiwidG9nZ2xlIjpmYWxzZX0%3D
Crawling: https://www.sensiblemoney.com/retirement-income-planning/
Crawling: https://www.sensiblemoney.com/retirement-investments/
Crawling: https://www.sensiblemoney.com/client-access/
Crawling: https://www.sensiblemoney.com/resources/
Crawling: https://www.sensiblemoney.com/retirement-planning-questions/
Crawling: https://www.sensiblemoney.com/careers/
Crawling: https://www.sensiblemoney.com/privacy-policy/
Crawl