In [65]:
import requests
from urllib.parse import urlparse, urljoin
from bs4 import BeautifulSoup
import colorama


# init the colorama module
# use colorama just for using different colors when printing, to distinguish between internal and external links:
colorama.init()
GREEN = colorama.Fore.GREEN
GRAY = colorama.Fore.LIGHTBLACK_EX
RESET = colorama.Fore.RESET


# initialize the set of links (unique links)
internal_urls = set()
external_urls = set()


def is_valid(url):
    """
    Checks whether `url` is a valid URL.
    """
    parsed = urlparse(url)
    return bool(parsed.netloc) and bool(parsed.scheme)


def get_all_website_links(url):
    """
    Returns all URLs that is found on `url` in which it belongs to the same website
    """
    # all URLs of `url`
    urls = set()
    # domain name of the URL without the protocol
    domain_name = urlparse(url).netloc
    soup = BeautifulSoup(requests.get(url).content, "html.parser")
    
    for a_tag in soup.findAll("a"):
        href = a_tag.attrs.get("href")
        if href == "" or href is None:
            # href empty tag
            continue
        # join the URL if it's relative (not absolute link)
        href = urljoin(url, href)
        parsed_href = urlparse(href)
        # remove URL GET parameters, URL fragments, etc.
        href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
        if not is_valid(href):
            # not a valid URL
            continue
        if href in internal_urls:
            # already in the set
            continue
        if domain_name not in href:
            # external link
            if href not in external_urls:
                print(f"{GRAY}[!] External link: {href}{RESET}")
                external_urls.add(href)
            continue
        print(f"{GREEN}[*] Internal link: {href}{RESET}")
        urls.add(href)
        internal_urls.add(href)
    return urls

# number of urls visited so far will be stored here
total_urls_visited = 0

def crawl(url, max_urls=100):
    """
    Crawls a web page and extracts all links.
    You'll find all links in `external_urls` and `internal_urls` global set variables.
    params:
        max_urls (int): number of max urls to crawl, default is 100.
    """
    global total_urls_visited
    total_urls_visited += 1
    links = get_all_website_links(url)
    for link in links:
        if total_urls_visited > max_urls:
            break
        crawl(link, max_urls=max_urls)

In [66]:
crawl('https://www.immoweb.be/en')
print("[+] Total External links:", len(external_urls))
print("[+] Total Internal links:", len(internal_urls))
print("[+] Total:", len(external_urls) + len(internal_urls))

[*] Internal link: https://www.immoweb.be/en
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-rent
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale
[*] Internal link: https://www.immoweb.be/en/search/house/for-rent
[*] Internal link: https://www.immoweb.be/en/search/apartment/for-sale
[*] Internal link: https://www.immoweb.be/en/search/apartment/for-rent
[*] Internal link: https://www.immoweb.be/en/page/about-immoweb
[*] Internal link: https://www.immoweb.be/en/page/the-immoweb-newsroom
[!] External link: https://www.linkedin.com/company/immoweb/jobs/
[!] External link: https://www.axelspringer.com/en/
[!] External link: https://valuation.immoweb.be/real-estate-estimate/seg1
[!] External link: https://www.constructr.be/en
[*] Internal link: https://www.immoweb.be/en/credit-application
[!] External link: https://protect.immoweb.be/tenant-home-insurance/landing
[*

[*] Internal link: https://web.archive.org/web/20200719235022/https://support.apple.com/en-us/HT204434
[*] Internal link: https://web.archive.org/web/20200719235022/https://support.microsoft.com/en-gb/help/13862/windows-10-use-high-contrast-mode
[*] Internal link: https://web.archive.org/web/20200719235022/https://immoweb.be/en/page/privacy
[*] Internal link: https://web.archive.org/web/20200719235022/https://www.immoweb.be/cdn-cgi/l/email-protection
[*] Internal link: https://web.archive.org/web/20200719235022/https://www.immoweb.be/en/search/house-and-apartment/for-sale
[*] Internal link: https://web.archive.org/web/20200719235022/https://www.immoweb.be/en/search/house-and-apartment/for-rent
[*] Internal link: https://web.archive.org/web/20200719235022/https://www.immoweb.be/en/search/house/for-sale
[*] Internal link: https://web.archive.org/web/20200719235022/https://www.immoweb.be/en/search/house/for-rent
[*] Internal link: https://web.archive.org/web/20200719235022/https://www.imm

[*] Internal link: https://web.archive.org/web/20200718174108/https://www.immoweb.be/en/page/privacy
[*] Internal link: https://web.archive.org/web/20200718174108/https://www.cim.be/fr/internet/disclaimer-cim-internet
[!] External link: http://blog.archive.org/2020/06/10/temporary-national-emergency-library-to-close-2-weeks-early-returning-to-traditional-controlled-digital-lending/
[!] External link: https://archive.org
[!] External link: https://archive.org/create/
[!] External link: https://archive.org/account/signup
[!] External link: https://archive.org/account/login
[!] External link: https://archive.org/web/
[!] External link: https://archive.org/details/texts
[!] External link: https://archive.org/details/movies
[!] External link: https://archive.org/details/audio
[!] External link: https://archive.org/details/software
[!] External link: https://archive.org/details/image
[!] External link: https://archive.org/donate/
[!] External link: https://archive.org/about/
[!] External lin

[*] Internal link: https://web.archive.org/web/20200114215630/https://www.constructr.be/register
[*] Internal link: https://web.archive.org/web/20200114215630/https://www.constructr.be/register/professional/choose-package
[*] Internal link: https://web.archive.org/web/20200114215630/https://www.constructr.be/en/about-us 
[*] Internal link: https://web.archive.org/web/20200114215630/https://www.constructr.be/en/privacy-policy
[*] Internal link: https://web.archive.org/web/20200114215630/https://www.constructr.be/en/cookie-policy
[*] Internal link: https://web.archive.org/web/20200904064007/https://www.constructr.be/en
[*] Internal link: https://web.archive.org/web/20200904064007/https://www.constructr.be/en/search-projects
[*] Internal link: https://web.archive.org/web/20200904064007/https://www.constructr.be/en/register
[*] Internal link: https://web.archive.org/web/20200904064007/https://www.constructr.be/en/login
[*] Internal link: https://web.archive.org/web/20200904064007/https://w

[*] Internal link: https://web.archive.org/web/20200904064020/https://www.constructr.be/project/renovation-interieure-durbuy/brix-architecture/contemporain-renovation-unifamiliale-grange-metal-interior
[*] Internal link: https://web.archive.org/web/20200904064020/https://www.constructr.be/project/renovation-grange-a-theux/brix-architecture/extension-pierre-contemporain-ossature-bois-renovation-maison-grange-baie-vitree
[*] Internal link: https://web.archive.org/web/20200904064020/https://www.constructr.be/project/maison-unifamiliale-a-la-bruyere/brix-architecture/blanc-crepis-beton-contemporain-neuve-maison-aluminium
[*] Internal link: https://web.archive.org/web/20200904064020/https://www.constructr.be/fr/about-us 
[*] Internal link: https://web.archive.org/web/20200904064020/https://www.constructr.be/fr/privacy-policy
[*] Internal link: https://web.archive.org/web/20200904064020/https://www.constructr.be/fr/cookie-policy
[*] Internal link: https://web.archive.org/web/20200904064020/h

KeyboardInterrupt: 

In [67]:
crawl('https://www.immoweb.be/en/search/house-and-apartment/for-sale?countries=BE&page=1&orderBy=relevance')
print("[+] Total External links:", len(external_urls))
print("[+] Total Internal links:", len(internal_urls))
print("[+] Total:", len(external_urls) + len(internal_urls))

[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/antwerp/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/brussels/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/east-flanders/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/flemish-brabant/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/hainaut/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/liege/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/limburg/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/luxembourg/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/namur/province
[*] Internal link: https://www.immoweb.be/en/search/house-and-apartment/for-sale/walloon-brabant/provinc

[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/essen/2910
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/kalmthout/2920
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/brasschaat/2930
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/stabroek/2940
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/kapellen/2950
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/brecht/2960
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/schilde/2970
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/zoersel/2980
[*] Internal link: https://www.immoweb.be/en/search/house/for-sale/wuustwezel/2990
[*] Internal link: https://www.immoweb.be/en/search/villa/for-sale/lint/2547
[*] Internal link: https://www.immoweb.be/en/search/apartment-block/for-sale/lint/2547
[*] Internal link: https://www.immoweb.be/en/search/town-house/for-sale/lint/2547
[*] Internal link: https://

[*] Internal link: https://www.immoweb.be/en/search/villa/for-sale - 3698
[*] Internal link: https://www.immoweb.be/en/search/apartment-block/for-sale - 1333
[*] Internal link: https://www.immoweb.be/en/search/town-house/for-sale - 421
[*] Internal link: https://www.immoweb.be/en/search/mansion/for-sale
[*] Internal link: https://www.immoweb.be/en/search/exceptional-property/for-sale
[*] Internal link: https://www.immoweb.be/en/search/farmhouse/for-sale
[*] Internal link: https://www.immoweb.be/en/search/bungalow/for-sale
[*] Internal link: https://www.immoweb.be/en/search/chalet/for-sale
[*] Internal link: https://www.immoweb.be/en/search/castle/for-sale
[*] Internal link: https://www.immoweb.be/en/search/country-cottage/for-sale
[*] Internal link: https://www.immoweb.be/en/search/mixed-use-building/for-sale
[*] Internal link: https://www.immoweb.be/en/search/other-properties/for-sale
[*] Internal link: https://www.immoweb.be/en/search/country-house/for-sale
[*] Internal link: https://www.immoweb.be/en/search/pavilion/for-sale
[*] Internal link: https://www.immoweb.be/en/search/loft/for-sale
[*] Internal link: https://www.immoweb.be/en/search/duplex/for-sale
[*] Internal link: https://www.immoweb.be/en/search/kot/for-sale
[*] Internal link: https://www.immoweb.be/en/search/penthouse/for-sale
[*] Internal link: https://www.immoweb.be/en/search/triplex/for-sale
[*] Internal link: https://www.immoweb.be/en/search/studio/for-sale
[*] Internal link: https://www.immoweb.be/en/search/ground-floor/for-sale
[*] Internal link: https://www.immoweb.be/en/search/service-flat/for-sale

In [72]:
import requests
from requests import get
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

url = "https://www.immoweb.be/en/search/house-and-apartment/for-sale?countries=BE&page=1&orderBy=relevance"
headers = {"Accept-Language": "en-US, en;q=0.5"}
results = requests.get(url, headers=headers)

soup = BeautifulSoup(results.text, "html.parser")

# link=[]


# #our loop through each container
# for_sale = soup.find_all('div', class_='search-results__item')
    
# for container in for_sale:   
    
#     name = container.h2.a.text
#     link.append(name)

In [76]:
soup.get.h1

AttributeError: 'function' object has no attribute 'h1'

In [73]:
print(link)

[]


In [None]:
<h2 id="card-f2fe0e76-4d34-4b5b-903f-c2fc60fc9e84-title" 
class="card__title card--result__title"><a id="" 
href="https://www.immoweb.be/en/classified/apartment/for-sale/uccle/1180/8892312?searchId=5f51df6beef7c" 
aria-label="Apartment for sale, Uccle (282500€)" class="card__title-link">
                    Apartment
                </a></h2>

In [None]:
<li class="search-results__item"><article id="classified_8892312" data-fakeid="f2fe0e76-4d34-4b5b-903f-c2fc60fc9e84" class="card card--result card--xl"><div id="lazy-loading-observer-wrapper-18728393-c1ab-4af3-b29f-c559879d79fa-classified_8892312" class="card--result__body"><!----> <h2 id="card-f2fe0e76-4d34-4b5b-903f-c2fc60fc9e84-title" class="card__title card--result__title"><a id="" href="https://www.immoweb.be/en/classified/apartment/for-sale/uccle/1180/8892312?searchId=5f51df6beef7c" aria-label="Apartment for sale, Uccle (282500€)" class="card__title-link">
                    Apartment
                </a></h2> <p class="card--result__price"><span><span aria-hidden="true">€282,500</span> <span class="sr-only">
        282500€
    </span></span></p> <div class="card__informations card--result__informations"><p class="card__information card--result__information card__information--property"><span class="abbreviation"><span aria-hidden="true">
        1 bdr.
    </span> <span class="sr-only">
        1 bedroom
    </span></span> 
                        ·
                     

                        73

                        <span class="abbreviation"><span aria-hidden="true">
        m²
    </span> <span class="sr-only">
        square meters
    </span></span> <!----> <!----></p> <p class="card__information card--results__information--locality card__information--locality">
                1180 Uccle
            </p></div> <div class="card__description card--result__description">
            Proche du parvis Saint-Pierre
        </div> <div class="card__logo-container card--result__logo-container"><img src="https://static.immoweb.be/logos/2352.gif?cache=2016111403293" alt="Latour et Petit Vente" class="card__logo card--result__logo card__logo--xl"></div></div> <div class="card--result__media"><div id="cardMediaContainer-f2fe0e76-4d34-4b5b-903f-c2fc60fc9e84" class="card__media-container"><!----><img src="https://static.immoweb.be/photos/0/8/8/9/2/3/1/2/8892312_1.jpg?cache=20200821030435" id="cardMediaPicture-f2fe0e76-4d34-4b5b-903f-c2fc60fc9e84" class="card__media-picture card--result__media-picture card__media-picture--loaded" alt="property_image"></div> <div class="flag-list"><div class="flag-list__item flag-list__item--main"><span class="flag-list__text">
                new
            </span></div> <div class="flag-list__item flag-list__item--secondary"><!----> <span class="flag-list__text">New build</span></div></div> <!----> <button tabindex="0" aria-label="Save property" class="classified-bookmark"><div class="icon-bookmark"><svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg" class="icon-bookmark__heart"><defs><clipPath id="bookmark-mask-89c5aabe-b063-4fb4-babb-91b35d971e0c"><path d="M13.417 20.73a2.095 2.095 0 0 1-2.824-.01l-.115-.105c-5.51-4.986-9.11-8.25-8.974-12.322.063-1.785.976-3.496 2.456-4.503C6.73 1.9 10.152 2.782 12 4.944c1.847-2.162 5.269-3.054 8.04-1.154 1.48 1.007 2.393 2.718 2.456 4.503.147 4.072-3.464 7.336-8.974 12.343l-.105.094z"></path></clipPath></defs> <path d="M13.35 20.307c-.76.69-1.93.69-2.69-.01l-.11-.1C5.3 15.447 1.87 12.337 2 8.457c.06-1.7.93-3.33 2.34-4.29 2.64-1.8 5.9-.96 7.66 1.1 1.76-2.06 5.02-2.91 7.66-1.1 1.41.96 2.28 2.59 2.34 4.29.14 3.88-3.3 6.99-8.55 11.76l-.1.09z" class="icon-bookmark__heart-fill"></path> <path fill-rule="evenodd" clip-rule="evenodd" d="M12 5.267c1.76-2.06 5.02-2.9 7.66-1.1 1.4.96 2.28 2.58 2.34 4.28.13 3.88-3.3 6.99-8.55 11.75l-.11.1c-.76.7-1.93.7-2.69.01l-.1-.09-.06-.055C5.274 15.423 1.86 12.322 2 8.457c.06-1.71.94-3.33 2.34-4.29 2.64-1.81 5.9-.96 7.66 1.1zm0 13.56l.1-.1c4.76-4.31 7.9-7.16 7.9-10.05 0-2-1.5-3.5-3.5-3.5-1.54 0-3.04.99-3.56 2.36h-1.87c-.53-1.37-2.03-2.36-3.57-2.36-2 0-3.5 1.5-3.5 3.5 0 2.89 3.14 5.74 7.9 10.05l.1.1z" class="icon-bookmark__heart-border"></path> <g clip-path="url(#bookmark-mask-89c5aabe-b063-4fb4-babb-91b35d971e0c)"><rect height="100%" width="100%" stroke-width="0" class="icon-bookmark__heart-fill-active expect-animation"></rect></g></svg></div></button></div></article> <!----> <!----> <!----> <!----> <!----></li>