In [None]:
import requests
import json
import re
import pandas as pd
from datetime import datetime
import concurrent.futures

today_date = datetime.now().strftime('%Y-%m-%d')
load_name = f'{today_date}_urls.csv'
#load_name = '2023-07-02_urls.csv'

df = pd.read_csv(load_name)
urls = []

def get_data(url):
    r = requests.get(url)
    data = r.json()
    return data

# Load URLs
df = pd.read_csv(load_name)
urls = ["https://www.sreality.cz/api/cs/v2/estates/" + str(x) for x in df['url_id']]

# create an empty list to store the data
data_list = []

# Using ThreadPoolExecutor to concurrently fetch data from all the urls.
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
    future_to_url = {executor.submit(get_data, url): url for url in urls}
    for future in concurrent.futures.as_completed(future_to_url):
        url = future_to_url[future]
        try:
            data = future.result()
            data_list.append(data)
        except Exception as exc:
            print('%r generated an exception: %s' % (url, exc))


In [None]:
# Create an empty DataFrame
dict_list = []

# loop through the data_list and extract the required data for each item
for data in data_list:
    # create an empty dictionary to store the data for this item
    dict_data = {}

    # check if '_links' and 'self' keys are in the data dictionary
    if '_links' in data and 'self' in data['_links']:
        # add the href value to the dictionary under the name 'url_id'
        dict_data['url_id'] = data['_links']['self'].get('href', None)

    # check if 'items' key is in the data dictionary
    if 'items' in data:
        # extract the required data for this item
        for item in data['items']:
            # add the value to the corresponding key in the dictionary
            dict_data[item['name']] = item['value']

    # check if 'map' key is in the data dictionary
    if 'map' in data:
        # add the latitude and longitude to the dictionary
        dict_data['Latitude'] = data['map'].get('lat', None)
        dict_data['Longitude'] = data['map'].get('lon', None)

    # check if there are any 'poi' items in the data
    if 'poi' in data:
        # loop through each 'poi' item
        for poi in data['poi']:
            # add the distance to the dictionary under the name of the poi
            # note that this will create a new column for each unique poi name
            dict_data[poi['name']] = poi['distance']

    # append the dictionary to the list
    dict_list.append(dict_data)

# convert the list of dictionaries to a DataFrame
df = pd.DataFrame(dict_list)

df.head()


In [12]:
filename = f'{today_date}_data.csv'
df.to_csv(filename, index=False)