In [3]:
import sys
from pathlib import Path
import requests
from typing import List, Any
import pandas as pd
import re
import pymongo
import os
from dotenv import load_dotenv

load_dotenv()

# Ensure project root is in sys.path
project_root = Path().resolve()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

In [21]:
# Function to extract and compose URLs
def extract_urls(id, estate):
    seo = estate.get("seo", {})
    locality = seo.get("locality", "")
    name = estate.get("name", "")
    type = get_type_from_name(name)
    if locality and name and id:
        return f"https://www.sreality.cz/detail/prodej/byt/{type}/{locality}/{id}"
    return ""

def fetch_new_listings(url: str) -> List[dict]:
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'Accept': 'application/json',
        'Content-Type': 'application/json',
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    # throw error
    response.raise_for_status()

def get_type_from_name(name):
    # Targets 1+1, 2+kk and similar paterns
    pattern = r"\b(\d+\+\S+)\b"
    try:
        type = re.findall(pattern, name)[0]
    except IndexError:
        type = "Unknown"
    return type

In [28]:
# Define the Sreality API URL
url = "https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&czk_price_summary_order2=0%7C3000000&locality_district_id=72&locality_region_id=14&per_page=20"

# Testing fetch_new_listings function with the URL parameter
data = fetch_new_listings(url)
print("Result Size:",  data["result_size"])

current_date = pd.to_datetime("today").strftime("%Y-%m-%dT%H:%M:%S")

# Extract estates details
estates = data["_embedded"]["estates"]
# Create a list of dictionaries to store estate data
estate_data = []
for estate in estates:
    id = estate.get("hash_id", "")
    estate_info = {
        "id": id,
        "name": estate["name"],
        "locality": estate["locality"],
        "price": estate["price"],
        "features": estate.get("labelsAll", [])[0],
        "url": extract_urls(id, estate),
        "scraped": current_date,
    }
    estate_data.append(estate_info)

# Create a DataFrame from the estate data
df_estates = pd.DataFrame(estate_data)
df_estates

Result Size: 10


Unnamed: 0,id,name,locality,price,features,url,scraped
0,4195112268,Prodej bytu 1+1 24 m²,"Cejl, Brno - Zábrdovice",2990000,"[personal, brick, elevator]",https://www.sreality.cz/detail/prodej/byt/1+1/...,2024-08-06T22:58:09
1,3065926988,Prodej bytu 1+kk 16 m²,"Drobného, Brno",2750000,"[personal, brick, cellar]",https://www.sreality.cz/detail/prodej/byt/1+kk...,2024-08-06T22:58:09
2,1716331852,Prodej bytu 1+kk 22 m²,"Vlhká, Brno - Zábrdovice",2999000,"[personal, brick, partly_furnished]",https://www.sreality.cz/detail/prodej/byt/1+kk...,2024-08-06T22:58:09
3,115565900,Prodej bytu 1+kk 27 m²,"Novoměstská, Brno - Řečkovice",2850000,"[personal, after_reconstruction, panel, cellar...",https://www.sreality.cz/detail/prodej/byt/1+kk...,2024-08-06T22:58:09
4,484336972,Prodej bytu 1+kk 27 m²,"Terezy Novákové, Brno - Řečkovice",2590000,"[personal, terrace, brick, cellar]",https://www.sreality.cz/detail/prodej/byt/1+kk...,2024-08-06T22:58:09
5,3834922316,Dražba bytu 1+1 27 m²,"Grmelova, Brno - Štýřice",2300000,"[state, brick]",https://www.sreality.cz/detail/prodej/byt/1+1/...,2024-08-06T22:58:09
6,196363596,Dražba bytu 1+1 26 m²,"Gallašova, Brno - Štýřice",2470000,"[state, brick]",https://www.sreality.cz/detail/prodej/byt/1+1/...,2024-08-06T22:58:09
7,1405510988,Prodej bytu 1+kk 22 m²,"Hostislavova, Brno - Žebětín",2590000,"[personal, brick, parking_lots, not_furnished]",https://www.sreality.cz/detail/prodej/byt/1+kk...,2024-08-06T22:58:09
8,3314615628,Prodej bytu 1+kk 22 m²,"Cejl, Brno - Zábrdovice",2390000,"[personal, brick, elevator, furnished]",https://www.sreality.cz/detail/prodej/byt/1+kk...,2024-08-06T22:58:09
9,1577313612,Prodej bytu 1+kk 23 m²,"Brno - Kníničky, okres Brno-město",2910000,"[new_building, personal, terrace, brick, eleva...",https://www.sreality.cz/detail/prodej/byt/1+kk...,2024-08-06T22:58:09


In [23]:
# save data into json file
df_estates.to_json("data.json", orient="records")

In [24]:
client = pymongo.MongoClient(os.getenv("DB_URI"))

db = client["sreality"]

for config in db["config"].find({"enabled": True}):
    print(config)

    

{'_id': 1, 'subject': 'Brno', 'recipients': 'ryxwaer@gmail.com', 'url': 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&czk_price_summary_order2=0%7C3000000&locality_district_id=72&locality_region_id=14&per_page=20&tms=1722505898806', 'enabled': True}


In [27]:
import requests
from typing import List

def fetch_new_listings2(url: str) -> List[dict]:
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
        'Accept': 'application/json',
        'Content-Type': 'application/json',
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return response.json()
    # throw error
    response.raise_for_status()

# Usage example
url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&czk_price_summary_order2=0%7C3000000&locality_district_id=72&locality_region_id=14&per_page=20'
listings = fetch_new_listings2(url)
# save data into json file
df_estates = pd.DataFrame(listings["_embedded"]["estates"])
df_estates.to_json("data.json", orient="records")

In [7]:
def compose_email_body(new_listings):
    formatted_listings = "".join(
        [f"<a href='{listing['url']}' style='text-decoration: none; color: inherit;' target='_blank'>"
         f"<div style='margin-bottom: 20px; padding: 20px; border: 1px solid #e0e0e0; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); transition: box-shadow 0.3s ease-in-out;'>"
         f"<h2 style='margin: 0 0 10px; font-size: 1.5em; color: #2c3e50;'>{listing['name']}</h2>"
         f"<p style='margin: 10px 0; color: #27ae60; font-weight: bold; font-size: 1.2em;'>{listing['price']} CZK</p>"
         f"<p style='margin: 5px 0; font-size: 1em; color: #555;'><strong>Locality:</strong> {listing['locality']}</p>"
         f"<p style='margin: 5px 0; font-size: 1em; color: #555;'><strong>Features:</strong> {listing['features']}</p>"
         f"</div>"
         f"</a>"
         for _, listing in new_listings.iterrows()]
    )
    return f"<html><body style='font-family: Arial, sans-serif; line-height: 1.6;'>{formatted_listings}</body></html>"

new_listings = [
    {"name": "Listing 1", "price": 1000, "url": "http://example.com/1", "locality": "Locality 1", "features": "Feature 1"},
    {"name": "Listing 2", "price": 2000, "url": "http://example.com/2", "locality": "Locality 2", "features": "Feature 2"}
]

new_listings = pd.DataFrame(new_listings)

print(compose_email_body(new_listings))

<html><body style='font-family: Arial, sans-serif; line-height: 1.6;'><a href='http://example.com/1' style='text-decoration: none; color: inherit;' target='_blank'><div style='margin-bottom: 20px; padding: 20px; border: 1px solid #e0e0e0; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); transition: box-shadow 0.3s ease-in-out;'><h2 style='margin: 0 0 10px; font-size: 1.5em; color: #2c3e50;'>Listing 1</h2><p style='margin: 10px 0; color: #27ae60; font-weight: bold; font-size: 1.2em;'>1000 CZK</p><p style='margin: 5px 0; font-size: 1em; color: #555;'><strong>Locality:</strong> Locality 1</p><p style='margin: 5px 0; font-size: 1em; color: #555;'><strong>Features:</strong> Feature 1</p></div></a><a href='http://example.com/2' style='text-decoration: none; color: inherit;' target='_blank'><div style='margin-bottom: 20px; padding: 20px; border: 1px solid #e0e0e0; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); transition: box-shadow 0.3s ease-in-out;'><h2 style='margin: 