In [2]:
# libraries for def scrape_apartments
import requests
from bs4 import BeautifulSoup
import os 
import re
import smtplib
import time
import schedule
from datetime import datetime as dt
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

In [21]:
# import smtplib
# from email.mime.multipart import MIMEMultipart
# from email.mime.text import MIMEText
# from apscheduler.schedulers.blocking import BlockingScheduler

## **Scraping the apartments 🏠**

#### Test for response 200

In [22]:
bez_realitky_url = r"""https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn
%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Cesko&location=exact&currency=CZK"""
response = requests.get(bez_realitky_url)
response

<Response [200]>

### **Getting a list of articles**

In [23]:
bez_realitky_url = r"https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Cesko&location=exact&currency=CZK"

# Send a GET request and getting HTML
response = requests.get(bez_realitky_url)
soup = BeautifulSoup(response.text, 'html.parser')

apt_list = soup.find_all("article", class_="PropertyCard_propertyCard__moO_5 propertyCard PropertyCard_propertyCard--landscape__XvPmC")

### **Getting prices 🤑**

In [24]:

pricess = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM div.PropertyPrice_propertyPrice__lthza.propertyPrice.mb-0.mt-3 span")
pricess

[<span class="PropertyPrice_propertyPriceAmount__WdEE1">CZK 35,000</span>,
 <span class="PropertyPrice_propertyPriceAdditional__5jYQ6"> + CZK 5,000</span>,
 <span class="PropertyPrice_propertyPricePerMeter__IfhGa">(<!-- -->CZK 350 / m²<!-- -->)</span>,
 <span class="w-100"></span>]

In [25]:
prices = []
# 2 spans
for apt in apt_list:
    price = apt.select("div.PropertyCard_propertyCardContent__osPAM div.PropertyPrice_propertyPrice__lthza.propertyPrice.mb-0.mt-3 span")
    if len(price) == 2:
        price1 = price[0].string.replace('\xa0', ' ') 
        price2 = price[1].string.replace('\xa0', ' ') 
        price_ok = f"{price1} {price2}".strip()
        prices.append(price_ok)
    else:
        price1 = price[0].string.replace('\xa0', ' ') 
        price_ok = f"{price1}".strip()
        prices.append(price_ok)
prices   

['CZK 35,000',
 'CZK 20,000',
 'CZK 28,000',
 'CZK 25,000',
 'CZK 29,000',
 'CZK 19,900',
 'CZK 15,600',
 'CZK 30,000',
 'CZK 25,000',
 'CZK 26,000',
 'CZK 13,000',
 'CZK 24,000',
 'CZK 19,600',
 'CZK 27,500',
 'CZK 37,000']

#### Coverting prices to float and assigning max price

In [26]:
def convert_price_to_float(prices):
    # Use regex to find all numbers in the string, including thousands separators
    price_numbers = re.findall(r'\d{1,3}(?:,\d{3})*', prices)
    
    # Convert each part to float and remove commas
    price_floats = [float(p.replace(',', '')) for p in price_numbers]
    
    # Return the sum of the two parts (if any)
    return sum(price_floats)

# Convert all prices to floats and filter for prices < 20000
filtered_prices = {index: p for index, p in enumerate(prices) if convert_price_to_float(str(p)) < 20000}

# Filtering needed indecies
index_ok = list(filtered_prices.keys())
print(index_ok)

# # Output the filtered prices 
prices_yes = [f"💰 {price}" for price in filtered_prices.values()]
prices_yes


[5, 6, 10, 12]


['💰 CZK 19,900', '💰 CZK 15,600', '💰 CZK 13,000', '💰 CZK 19,600']

In [27]:
# for i in prices:
#     price_numbers = re.findall(r'\d{1,3}(?:,\d{3})*', i)
#     print(price_numbers)


### **Getting links 🔗**

In [28]:
# HINT: Whenever there is a space in name of the class, I should use dot to get rid of this 
links = []
for apt in apt_list:
    link = apt.select_one("a")
    if link:
        link_ok = link["href"]
    links.append(link_ok)
links = [f"🔗 {link}" for link in links]
links_yes = [links[p] for p in index_ok]
links_yes

['🔗 https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Cesko&location=exact&currency=CZK&id=819701&_UCR_return_href=%2Fsearch%3FofferType%3DPRONAJEM%26estateType%3DBYT%26regionOsmIds%3DR435514%26osm_value%3DHlavn%25C3%25AD%2520m%25C4%259Bsto%2520Praha%252C%2520Praha%252C%2520%25C4%258Cesko%26location%3Dexact%26currency%3DCZK',
 '🔗 https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Cesko&location=exact&currency=CZK&id=867321&_UCR_return_href=%2Fsearch%3FofferType%3DPRONAJEM%26estateType%3DBYT%26regionOsmIds%3DR435514%26osm_value%3DHlavn%25C3%25AD%2520m%25C4%259Bsto%2520Praha%252C%2520Praha%252C%2520%25C4%258Cesko%26location%3Dexact%26currency%3DCZK',
 '🔗 https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Ce

### **Getting addresses 📍**

In [29]:
address = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM h2.PropertyCard_propertyCardHeadline___diKI.mt-md-0.mt-4.mb-0 a span.PropertyCard_propertyCardAddress__hNqyR.text-subheadline.text-truncate")
address

[<span class="PropertyCard_propertyCardAddress__hNqyR text-subheadline text-truncate" style="-webkit-line-clamp:1">Na Farkáně Ⅰ, Prague - Radlice</span>]

In [30]:
addresses = []
for apt in apt_list:
    address = apt.select("div.PropertyCard_propertyCardContent__osPAM h2.PropertyCard_propertyCardHeadline___diKI.mt-md-0.mt-4.mb-0 a span.PropertyCard_propertyCardAddress__hNqyR.text-subheadline.text-truncate")
    address_ok = address[0].string
    addresses.append(address_ok)
addresses = [f"📍 {address}" for address in addresses]
addresses_yes = [addresses[p] for p in index_ok]
addresses_yes

['📍 Vítkova, Prague - Karlín',
 '📍 Suchdolské náměstí, Prague - Suchdol',
 '📍 5. května, Prague - Nusle',
 '📍 Oldřichova, Prague - Nusle']

### **Getting flat configurations 🕵️‍♂️**

In [31]:
info = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM ul.FeaturesList_featuresList__75Wet.featuresList.mt-3 li")
config = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM p.mt-2.mt-md-3.mb-0.text-caption.text-truncate-multiple")
info2_image = "📐"
#print(info)
#print(config)
config

[<p class="mt-2 mt-md-3 mb-0 text-caption text-truncate-multiple" style="-webkit-line-clamp:2">Parking • Cellar 20 m²</p>]

### Info (info_flats and info_squares)

In [32]:

info_flats = []
info_squares = []
info = []

for apt in apt_list:
    # info_flats
    info_flat_art = apt.select("div.PropertyCard_propertyCardContent__osPAM ul.FeaturesList_featuresList__75Wet.featuresList.mt-3 li span")
    info_square_art = apt.select("div.PropertyCard_propertyCardContent__osPAM ul.FeaturesList_featuresList__75Wet.featuresList.mt-3 li")
    
### info_flats
    if len(info_flat_art) > 1:  # Make sure there are enough spans
        info_flat = info_flat_art[1]
        info_flats.append(info_flat.text)  # Append the current flat's text

    if len(info_square_art) > 1:  # Make sure there's a second list item
        info_square = info_square_art[1].text
        info_square_ok = info_square.replace('\xa0', ' ')
        info_squares.append(info_square_ok)  # Append the current square's text

    # Append the current flat and square info into the info list
    if len(info_flats) > 0 and len(info_squares) > 0:
        info.append([info2_image, info_flats[-1], info_squares[-1]])  # Use the most recent elements from the lists
    
concat_info = [' '.join(inner) for inner in info]
info_yes = [concat_info[p] for p in index_ok]
info_yes    

['📐 Studio 31 m²', '📐 Studio 22 m²', '📐 1 bedroom 44 m²', '📐 2+kk 46 m²']

### Configs

In [33]:
configs = []
for apt in apt_list:
    config = apt.select("div.PropertyCard_propertyCardContent__osPAM p.mt-2.mt-md-3.mb-0.text-caption.text-truncate-multiple")
    config_ok = config[0].text
    configs.append(config_ok)
configs    
configs_good = [f"🤔 {con}" for con in configs]
configs_good
configs_yes = [configs_good[p] for p in index_ok]
configs_yes

['🤔 Public transport 1 minute of walking • Equipped • Lift',
 '🤔 Public transport 1 minute of walking • Equipped • Lift • Parking • Cellar 6 m²',
 '🤔 Public transport 6 minutes of walking • Lift • Balcony 2 m²',
 '🤔 Partially equipped • Cellar 3 m²']

### Divider

In [34]:
divider = "┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅"

## **Concateneting 💯**

In [35]:
today = str(dt.today().strftime('%d-%m-%y'))
today

'26-01-25'

In [36]:
flat = []
divider = "\n" + "─" * 50 + "\n"  # Create a more pronounced divider
header = f"🏠 NEW Flats from {today} under 20K CZK/month:\n" + "\n"  # Header for the output
flat.append(header)

for i in range(len(info_yes)):
    flat.append(f"Address: {addresses_yes[i]}\n")
    flat.append(f"Info: {info_yes[i]}\n")
    flat.append(f"Config: {configs_yes[i]}\n")
    flat.append(f"Price: {prices_yes[i]}\n")
    flat.append(f"Link: {links_yes[i]}\n")
    flat.append(divider)  # Add divider after each entry

# Join all the elements into a single string
flat = "".join(flat)

# Output the formatted string
print(flat)
print(type(flat))

🏠 NEW Flats from 26-01-25 under 20K CZK/month:

Address: 📍 Vítkova, Prague - Karlín
Info: 📐 Studio 31 m²
Config: 🤔 Public transport 1 minute of walking • Equipped • Lift
Price: 💰 CZK 19,900
Link: 🔗 https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Cesko&location=exact&currency=CZK&id=819701&_UCR_return_href=%2Fsearch%3FofferType%3DPRONAJEM%26estateType%3DBYT%26regionOsmIds%3DR435514%26osm_value%3DHlavn%25C3%25AD%2520m%25C4%259Bsto%2520Praha%252C%2520Praha%252C%2520%25C4%258Cesko%26location%3Dexact%26currency%3DCZK

──────────────────────────────────────────────────
Address: 📍 Suchdolské náměstí, Prague - Suchdol
Info: 📐 Studio 22 m²
Config: 🤔 Public transport 1 minute of walking • Equipped • Lift • Parking • Cellar 6 m²
Price: 💰 CZK 15,600
Link: 🔗 https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8

## **Sending an email 📧**

In [3]:
sender_email = "glebon126@email.cz"
receiver_email = [ 'Glebon126@gmail.com', 'Hlibovski@gmail.com', "brylkovika@gmail.com", "china55060@gmail.com",'allieformankova@gmail.com', 'alinayakymtso@gmail.com']
# receiver_email2 = [ 'Glebon126@gmail.com', 'Hlibovski@gmail.com']
SEZNAM_MAIL_PASSWORD = os.environ["SEZNAM_MAIL_PASSWORD"]

for mail in receiver_email:
    message = MIMEMultipart()
    message["From"] = sender_email
    message["To"] = mail
    message["Subject"] = "BezRealitky NEW Flats"
    
    # Add body to email
    body = flat
    message.attach(MIMEText(body, 'plain'))
    
    # Send email
    try:
        with smtplib.SMTP("smtp.seznam.cz", 587) as server:
            server.starttls()  # Secure the connection
            server.login(sender_email, SEZNAM_MAIL_PASSWORD)  # Login with email and password
            server.send_message(message)  # Send the email
        print("Email sent successfully!")
    except Exception as e:
        print(f"Error occurred: {e}")
    
    # # Schedule the email to be sent daily at 8:00 AM
    # schedule.every().day.at("21:51").do(send_email)
    
    # # Keep the script running to check the schedule
    # while True:
    #     schedule.run_pending()
    #     time.sleep(60)

In [4]:
# if password is None:
#     print("Error: SEZNAM_MAIL_PASSWORD is not set.")
# else:
#     print("Password loaded successfully.")

Error: SEZNAM_MAIL_PASSWORD is not set.
