Fetching data about houses from Sreality api

In [None]:
import requests
import csv
import json

houses = []
total_pages = 454

for page in range(1, total_pages + 1):
    url = f"https://www.sreality.cz/api/cs/v2/estates?category_main_cb=2&category_type_cb=1&per_page=40&page={page}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        houses_page = data.get('_embedded', {}).get('estates', [])
        houses.append(houses_page)
        print(f"Načtena stránka {page}")
    else:
        print(f"Chyba při načítání stránky {page} (status code: {response.status_code})")

print(f"Celkem načteno {len(houses)} domů.")

Save all data to json file

In [None]:
import os

if(not os.path.exists("../data")):
    os.makedirs("../data")

with open('../data/houses.json', 'w') as f:
    json.dump(houses, f, indent=4)

Extract only needed data from json

In [None]:
import asyncio
import aiohttp

async def fetch_house(session, house,count):
    hash_id = house["hash_id"]
    while True:
        try:
            async with session.get(f"https://www.sreality.cz/api/cs/v2/estates/{hash_id}") as response:
                data = await response.json()
                if response.status in [404,410]:
                    return["" for i in range(17)]
                data = data["recommendations_data"]
                print(count)
                return [
                    house["price"],
                    house["gps"]["lat"],
                    house["gps"]["lon"],
                    data["usable_area"],
                    data["estate_area"],
                    1 if "garage" in house["labelsAll"][0] else 0,
                    1 if "new_building" in house["labelsAll"][0] else 0,
                    1 if "furnished" in house["labelsAll"][0] else 0,
                    1 if "cellar" in house["labelsAll"][0] else 0,
                    1 if "parking_lots" in house["labelsAll"][0] else 0,
                    1 if "after_reconstruction" in house["labelsAll"][0] else 0,
                    1 if "balcony" in house["labelsAll"][0] else 0,
                    1 if "terrace" in house["labelsAll"][0] else 0,
                    data["locality_region_id"],
                    data["locality_district_id"],
                    data["room_count_cb"],
                    data["building_condition"]
                ]
        except Exception:
            await asyncio.sleep(1)
            pass

async def main():
    tasks = []
    count = 0
    async with aiohttp.ClientSession() as session:
        for house_list in houses:
            for house in house_list:
                count +=1
                tasks.append(fetch_house(session, house,count))

        results = await asyncio.gather(*tasks)

    print(f"Počet vyčištěných domů: {len(results)}")
    print("Domy jsou vyčištěny")
    return results

cleaned_houses = await main()


Save all extracted data to csv file

In [None]:
with open("../data/houses.csv","w",newline='') as f:
        csvwriter = csv.writer(f)
        rows = ["price", "lat", "lon","usable_area", "land_area", "garage", "new", "furnished", "cellar", "parkingLots", "reconstructed", "balcon", "terrace","region","district","room_count","condition"]
        csvwriter.writerow(rows)

for i in cleaned_houses:
     with open("../data/houses.csv","a",newline='') as f:
        csvwriter = csv.writer(f)
        csvwriter.writerow(i)