In [1]:
# libraries for def scrape_apartments
import requests
from bs4 import BeautifulSoup
import re
import smtplib
import time
import schedule
from datetime import datetime as dt
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

In [2]:
# import smtplib
# from email.mime.multipart import MIMEMultipart
# from email.mime.text import MIMEText
# from apscheduler.schedulers.blocking import BlockingScheduler

## **Scraping the apartments 🏠**

#### Test for response 200

In [3]:
bez_realitky_url = r"""https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn
%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Cesko&location=exact&currency=CZK"""
response = requests.get(bez_realitky_url)
response

<Response [200]>

### **Getting a list of articles**

In [4]:
bez_realitky_url = r"https://www.bezrealitky.com/search?offerType=PRONAJEM&estateType=BYT&regionOsmIds=R435514&osm_value=Hlavn%C3%AD+m%C4%9Bsto+Praha%2C+Praha%2C+%C4%8Cesko&location=exact&currency=CZK"

# Send a GET request and getting HTML
response = requests.get(bez_realitky_url)
soup = BeautifulSoup(response.text, 'html.parser')

apt_list = soup.find_all("article", class_="PropertyCard_propertyCard__moO_5 propertyCard PropertyCard_propertyCard--landscape__XvPmC")

### **Getting prices 🤑**

In [5]:

pricess = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM div.PropertyPrice_propertyPrice__lthza.propertyPrice.mb-0.mt-3 span")
pricess

[<span class="PropertyPrice_propertyPriceAmount__WdEE1">CZK 25,000</span>,
 <span class="PropertyPrice_propertyPriceAdditional__5jYQ6"> + CZK 7,000</span>]

In [6]:
prices = []
# 2 spans
for apt in apt_list:
    price = apt.select("div.PropertyCard_propertyCardContent__osPAM div.PropertyPrice_propertyPrice__lthza.propertyPrice.mb-0.mt-3 span")
    if len(price) == 2:
        price1 = price[0].string.replace('\xa0', ' ') 
        price2 = price[1].string.replace('\xa0', ' ') 
        price_ok = f"{price1} {price2}".strip()
        prices.append(price_ok)
    else:
        price1 = price[0].string.replace('\xa0', ' ') 
        price_ok = f"{price1}".strip()
        prices.append(price_ok)
prices   

['CZK 25,000  + CZK 7,000',
 'CZK 20,925  + CZK 4,355',
 'CZK 13,000  + CZK 4,000',
 'CZK 15,000  + CZK 4,000',
 'CZK 35,000  + CZK 750',
 'CZK 14,950  + CZK 3,750',
 'CZK 34,800  + CZK 2,100',
 'CZK 15,000  + CZK 3,158',
 'CZK 33,000  + CZK 6,000',
 'CZK 17,500  + CZK 3,500',
 'CZK 16,500  + CZK 4,900',
 'CZK 24,000  + CZK 5,561',
 'CZK 16,000  + CZK 3,000',
 'CZK 25,000  + CZK 3,000',
 'CZK 26,000  + CZK 3,353']

#### Coverting prices to float and assigning max price

In [7]:
def convert_price_to_float(prices):
    # Use regex to find all numbers in the string, including thousands separators
    price_numbers = re.findall(r'\d{1,3}(?:,\d{3})*', prices)
    
    # Convert each part to float and remove commas
    price_floats = [float(p.replace(',', '')) for p in price_numbers]
    
    # Return the sum of the two parts (if any)
    return sum(price_floats)

# Convert all prices to floats and filter for prices < 20000
filtered_prices = {index: p for index, p in enumerate(prices) if convert_price_to_float(str(p)) < 20000}

# Filtering needed indecies
index_ok = list(filtered_prices.keys())
print(index_ok)

# # Output the filtered prices 
prices_yes = [f"💰 {price}" for price in filtered_prices.values()]
prices_yes


[2, 3, 5, 7, 12]


['💰 CZK 13,000  + CZK 4,000',
 '💰 CZK 15,000  + CZK 4,000',
 '💰 CZK 14,950  + CZK 3,750',
 '💰 CZK 15,000  + CZK 3,158',
 '💰 CZK 16,000  + CZK 3,000']

In [8]:
# for i in prices:
#     price_numbers = re.findall(r'\d{1,3}(?:,\d{3})*', i)
#     print(price_numbers)


### **Getting links 🔗**

In [9]:
# HINT: Whenever there is a space in name of the class, I should use dot to get rid of this 
links = []
for apt in apt_list:
    link = apt.select_one("a")
    if link:
        link_ok = link["href"]
    links.append(link_ok)
links = [f"🔗 {link}" for link in links]
links_yes = [links[p] for p in index_ok]
links_yes

['🔗 https://www.bezrealitky.com/properties-flats-houses/649487-nabidka-pronajem-bytu-nechanicka-praha',
 '🔗 https://www.bezrealitky.com/properties-flats-houses/616419-nabidka-pronajem-bytu-nechanicka-praha',
 '🔗 https://www.bezrealitky.com/properties-flats-houses/715329-nabidka-pronajem-bytu-28-pluku-praha',
 '🔗 https://www.bezrealitky.com/properties-flats-houses/869031-nabidka-pronajem-bytu-sporicka-hlavni-mesto-praha',
 '🔗 https://www.bezrealitky.com/properties-flats-houses/869025-nabidka-pronajem-bytu-sazovicka-praha']

### **Getting addresses 📍**

In [10]:
address = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM h2.PropertyCard_propertyCardHeadline___diKI.mt-md-0.mt-4.mb-0 a span.PropertyCard_propertyCardAddress__hNqyR.text-subheadline.text-truncate")
address

[<span class="PropertyCard_propertyCardAddress__hNqyR text-subheadline text-truncate" style="-webkit-line-clamp:1">Zlešická, Prague - Chodov</span>]

In [11]:
addresses = []
for apt in apt_list:
    address = apt.select("div.PropertyCard_propertyCardContent__osPAM h2.PropertyCard_propertyCardHeadline___diKI.mt-md-0.mt-4.mb-0 a span.PropertyCard_propertyCardAddress__hNqyR.text-subheadline.text-truncate")
    address_ok = address[0].string
    addresses.append(address_ok)
addresses = [f"📍 {address}" for address in addresses]
addresses_yes = [addresses[p] for p in index_ok]
addresses_yes

['📍 Nechanická, Prague - Kamýk',
 '📍 Nechanická, Prague - Kamýk',
 '📍 28. pluku, Prague - Vršovice',
 '📍 Spořická, Prague - Dolní Chabry',
 '📍 Sazovická, Prague - Zličín']

### **Getting flat configurations 🕵️‍♂️**

In [12]:
info = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM ul.FeaturesList_featuresList__75Wet.featuresList.mt-3 li")
config = apt_list[0].select("div.PropertyCard_propertyCardContent__osPAM p.mt-2.mt-md-3.mb-0.text-caption.text-truncate-multiple")
info2_image = "📐"
#print(info)
#print(config)
config

[<p class="mt-2 mt-md-3 mb-0 text-caption text-truncate-multiple" style="-webkit-line-clamp:2">Public transport 2 minutes of walking • Partially equipped • Lift • Balcony 5 m² • Cellar 4 m²</p>]

### Info (info_flats and info_squares)

In [13]:

info_flats = []
info_squares = []
info = []

for apt in apt_list:
    # info_flats
    info_flat_art = apt.select("div.PropertyCard_propertyCardContent__osPAM ul.FeaturesList_featuresList__75Wet.featuresList.mt-3 li span")
    info_square_art = apt.select("div.PropertyCard_propertyCardContent__osPAM ul.FeaturesList_featuresList__75Wet.featuresList.mt-3 li")
    
### info_flats
    if len(info_flat_art) > 1:  # Make sure there are enough spans
        info_flat = info_flat_art[1]
        info_flats.append(info_flat.text)  # Append the current flat's text

    if len(info_square_art) > 1:  # Make sure there's a second list item
        info_square = info_square_art[1].text
        info_square_ok = info_square.replace('\xa0', ' ')
        info_squares.append(info_square_ok)  # Append the current square's text

    # Append the current flat and square info into the info list
    if len(info_flats) > 0 and len(info_squares) > 0:
        info.append([info2_image, info_flats[-1], info_squares[-1]])  # Use the most recent elements from the lists
    
concat_info = [' '.join(inner) for inner in info]
info_yes = [concat_info[p] for p in index_ok]
info_yes    

['📐 2+kk 58 m²',
 '📐 2+kk 45 m²',
 '📐 Studio 32 m²',
 '📐 Studio 39 m²',
 '📐 Studio 40 m²']

### Configs

In [14]:
configs = []
for apt in apt_list:
    config = apt.select("div.PropertyCard_propertyCardContent__osPAM p.mt-2.mt-md-3.mb-0.text-caption.text-truncate-multiple")
    config_ok = config[0].text
    configs.append(config_ok)
configs    
configs_good = [f"🤔 {con}" for con in configs]
configs_good
configs_yes = [configs_good[p] for p in index_ok]
configs_yes

['🤔 Public transport 3 minutes of walking • Equipped • Parking',
 '🤔 Public transport 5 minutes of walking • Equipped • Parking',
 '🤔 Public transport 1 minute of walking • Partially equipped • Lift • Balcony 6 m² • Cellar 2 m²',
 '🤔 Partially equipped • Parking • Cellar 2.7 m²',
 '🤔 Equipped • Lift • Cellar 3.5 m² • Loggia 7 m²']

### Divider

In [15]:
divider = "┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅┅"

## **Concateneting 💯**

In [16]:
today = str(dt.today().strftime('%d-%m-%y'))
today

'24-10-24'

In [17]:
flat = []
divider = "\n" + "─" * 50 + "\n"  # Create a more pronounced divider
header = f"🏠 NEW Flats from {today} under 20K CZK/month:\n" + "\n"  # Header for the output
flat.append(header)

for i in range(len(info_yes)):
    flat.append(f"Address: {addresses_yes[i]}\n")
    flat.append(f"Info: {info_yes[i]}\n")
    flat.append(f"Config: {configs_yes[i]}\n")
    flat.append(f"Price: {prices_yes[i]}\n")
    flat.append(f"Link: {links_yes[i]}\n")
    flat.append(divider)  # Add divider after each entry

# Join all the elements into a single string
flat = "".join(flat)

# Output the formatted string
print(flat)
print(type(flat))

🏠 NEW Flats from 24-10-24 under 20K CZK/month:

Address: 📍 Nechanická, Prague - Kamýk
Info: 📐 2+kk 58 m²
Config: 🤔 Public transport 3 minutes of walking • Equipped • Parking
Price: 💰 CZK 13,000  + CZK 4,000
Link: 🔗 https://www.bezrealitky.com/properties-flats-houses/649487-nabidka-pronajem-bytu-nechanicka-praha

──────────────────────────────────────────────────
Address: 📍 Nechanická, Prague - Kamýk
Info: 📐 2+kk 45 m²
Config: 🤔 Public transport 5 minutes of walking • Equipped • Parking
Price: 💰 CZK 15,000  + CZK 4,000
Link: 🔗 https://www.bezrealitky.com/properties-flats-houses/616419-nabidka-pronajem-bytu-nechanicka-praha

──────────────────────────────────────────────────
Address: 📍 28. pluku, Prague - Vršovice
Info: 📐 Studio 32 m²
Config: 🤔 Public transport 1 minute of walking • Partially equipped • Lift • Balcony 6 m² • Cellar 2 m²
Price: 💰 CZK 14,950  + CZK 3,750
Link: 🔗 https://www.bezrealitky.com/properties-flats-houses/715329-nabidka-pronajem-bytu-28-pluku-praha

───────────────

## **Sending an email 📧**

In [18]:
# sender_email = "glebon126@email.cz"
# receiver_email = "allieformankova@gmail.com"
# password = "DolyBLAtaNa37277"



# message = MIMEMultipart()
# message["From"] = sender_email
# message["To"] = receiver_email
# message["Subject"] = "BezRealitky NEW Flats"

# # Add body to email
# body = flat
# message.attach(MIMEText(body, 'plain'))

# # Send email
# try:
#     with smtplib.SMTP("smtp.seznam.cz", 587) as server:
#         server.starttls()  # Secure the connection
#         server.login(sender_email, password)  # Login with email and password
#         server.send_message(message)  # Send the email
#     print("Email sent successfully!")
# except Exception as e:
#     print(f"Error occurred: {e}")

# # # Schedule the email to be sent daily at 8:00 AM
# # schedule.every().day.at("13:19").do(send_email)

# # Keep the script running to check the schedule
# # while True:
# #     schedule.run_pending()
# #     time.sleep(60)

In [None]:
sender_email = "glebon126@email.cz"
receiver_email = "glebon126@gmail.com"
password = "DolyBLAtaNa37277"

def send_email():
    # Create message
    message = MIMEMultipart()
    message["From"] = sender_email
    message["To"] = receiver_email
    message["Subject"] = "BezRealitky NEW Flats"
    
    # Add body to email
    body = flat
    message.attach(MIMEText(body, 'plain'))
    
    # Send email
    try:
        with smtplib.SMTP("smtp.seznam.cz", 587) as server:
            server.starttls()  # Secure the connection
            server.login(sender_email, password)  # Login with email and password
            server.send_message(message)  # Send the email
        print("Email sent successfully!")
    except Exception as e:
        print(f"Error occurred: {e}")

# Schedule the email to be sent daily at 8:00 AM
schedule.every().day.at("00:29").do(send_email)

# Keep the script running to check the schedule
while True:
    schedule.run_pending()
    time.sleep(60)

In [None]:
# def job():
#     max_price = float(input("Enter the maximum price: "))
#     recipient_email = input("Enter your email address: ")
#     apartments = scrape_apartments(max_price)
#     if apartments:
#         send_email(apartments, recipient_email)
#         print(f"Email sent to {recipient_email} with {len(apartments)} listings.")
#     else:
#         print("No apartments found within the specified price range.")


# if __name__ == "__main__":
#     scheduler = BlockingScheduler()
#     scheduler.add_job(job, 'interval', days=1)
#     try:
#         print("Starting the scheduler...")
#         scheduler.start()
#     except (KeyboardInterrupt, SystemExit):
#         pass