In [174]:
import re 
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [129]:
base_url = 'https://www.realtor.com/international//'

In [130]:
response = requests.get(base_url)

if response.status_code == 200:
    print(f'Connected to {response.url} successfully: {response.status_code}')
else:
    print(f"Failed to connect to {response.url}: {response.status_code}")

Connected to https://www.realtor.com/international// successfully: 200


In [131]:
soup = BeautifulSoup(response.content, 'lxml')

# 1. Fetching country cards

In [141]:
# country cards
cards = soup.find_all('div', {'class': 'slick-slide'})
cards

[<div aria-hidden="false" class="slick-slide slick-active slick-current" data-index="0" style="outline:none;width:9.090909090909092%" tabindex="-1"><div><a href="/international/mx/" style="width:100%;display:inline-block" tabindex="-1"><div class="sc-1t94e3u-5 gpjyVs card-wrapper d8awwk-2 hxpFJa"><div class="sc-1t94e3u-2 gBcSse card-image"><img alt="Mexico" class="sc-1t94e3u-3 jSIGMw" src="//s3.rea.global/img/204x112-crop/homepage/mx.jpg"/></div><h3 class="sc-1t94e3u-6 jvAkAa d8awwk-0 jjXaWo city-name"><span aria-label="Mexico" class="ant-typography ant-typography-ellipsis ant-typography-single-line sc-10k0zjf-0 ifntzL">Mexico<span aria-hidden="true" style="position:fixed;display:block;left:0;top:0;z-index:-9999;visibility:hidden;pointer-events:none;word-break:keep-all;white-space:nowrap">lg</span><span aria-hidden="true" style="position:fixed;display:block;left:0;top:0;z-index:-9999;visibility:hidden;pointer-events:none;width:0;white-space:normal;margin:0;padding:0"><span aria-hidden=

# 2. Fetching 
- Country names
- No of Sales done by each country
- Rentalss in each country

In [232]:
countries = []
sales = []
rentals = []
card_urls = []

for card in cards:
    country_names = card.select_one('h3.city-name span')
    if country_names:
        countries.append(country_names.text.replace('...', '').strip().replace('lg', ''))
    else:
        countries.append('Nan')

    additional_info = card.select_one('p.listing-number span')
    if additional_info:
        info_list = additional_info.text.strip().split()
        if len(info_list) > 3:
            sales.append(info_list[0])
            rentals.append(info_list[2])
        else:
            sales.append('Nan')
            rentals.append('Nan')
    else:
        sales.append('Nan')
        rentals.append('Nan')

    link = card.select_one('a')
    if link and 'href' in link.attrs:
        url = link['href']
        url_parts = url.split('/')
        if len(url_parts) > 3:
            cleared_url = f"/{url_parts[1]}/{url_parts[2]}/"
            card_urls.append(cleared_url)
        else:
            card_urls.append('Nan')
    else:
        card_urls.append('Nan')

In [229]:
print(f"Countries Length: {len(countries)}")
print(f"Sales Length: {len(sales)}")
print(f"Rentals Length: {len(rentals)}")
print(f"Card urls: {len(card_urls)}")

Countries Length: 29
Sales Length: 29
Rentals Length: 29
Card urls: 29


In [234]:
card_urls

['/international/mx/',
 '/international/bs/',
 '/international/jm/',
 '/international/do/',
 '/international/ca/',
 '/international/cr/',
 '/international/br/',
 '/international/au/',
 '/international/nz/',
 '/international/my/',
 '/international/us/',
 '/international/my/',
 '/international/nz/',
 '/international/au/',
 '/international/br/',
 '/international/cr/',
 '/international/ca/',
 '/international/do/',
 '/international/jm/',
 '/international/bs/',
 '/international/mx/',
 '/international/my/',
 '/international/nz/',
 '/international/au/',
 '/international/au/',
 '/international/au/',
 '/international/au/',
 '/international/au/',
 '/international/au/']

In [235]:
print("Countries:", countries)
print("Sales:", sales)
print("Rentals:", rentals)
print('Urls: ', card_urls)

Countries: ['Mexico', 'The Bahamas', 'Jamaica', 'Dominican Republic', 'Canada', 'Costa Rica', 'Brazil', 'Australia', 'New Zealand', 'Malaysia', 'United States', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan']
Sales: ['56,897', '4,290', '4,916', '9,350', '13,289', '9,667', '39,000', 'Nan', '26,274', 'Nan', '1,378,871', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan']
Rentals: ['5,099', '371', '1,767', '152', '1,510', '492', '1,977', 'Nan', '12,636', 'Nan', '152,925', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan', 'Nan']
Urls:  ['/international/mx/', '/international/bs/', '/international/jm/', '/international/do/', '/international/ca/', '/international/cr/', '/international/br/', '/international/au/', '/international/nz/', '/international/my/', '/international/us/'

In [237]:
for i in range(len(countries)):
    print(f"Country: {countries[i]} -> Sales: {sales[i]} -> Rentals: {rentals[i]} -> Url: {card_urls}")

Country: Mexico -> Sales: 56,897 -> Rentals: 5,099 -> Url: ['/international/mx/', '/international/bs/', '/international/jm/', '/international/do/', '/international/ca/', '/international/cr/', '/international/br/', '/international/au/', '/international/nz/', '/international/my/', '/international/us/', '/international/my/', '/international/nz/', '/international/au/', '/international/br/', '/international/cr/', '/international/ca/', '/international/do/', '/international/jm/', '/international/bs/', '/international/mx/', '/international/my/', '/international/nz/', '/international/au/', '/international/au/', '/international/au/', '/international/au/', '/international/au/', '/international/au/']
Country: The Bahamas -> Sales: 4,290 -> Rentals: 371 -> Url: ['/international/mx/', '/international/bs/', '/international/jm/', '/international/do/', '/international/ca/', '/international/cr/', '/international/br/', '/international/au/', '/international/nz/', '/international/my/', '/international/us/'

# 3. Entering each country cards

In [244]:
card_urls

['/international/mx/',
 '/international/bs/',
 '/international/jm/',
 '/international/do/',
 '/international/ca/',
 '/international/cr/',
 '/international/br/',
 '/international/au/',
 '/international/nz/',
 '/international/my/',
 '/international/us/',
 '/international/my/',
 '/international/nz/',
 '/international/au/',
 '/international/br/',
 '/international/cr/',
 '/international/ca/',
 '/international/do/',
 '/international/jm/',
 '/international/bs/',
 '/international/mx/',
 '/international/my/',
 '/international/nz/',
 '/international/au/',
 '/international/au/',
 '/international/au/',
 '/international/au/',
 '/international/au/',
 '/international/au/']