In [38]:
import requests
import pandas as pd
import time

# Seminar - APIs and real-life coding

## Task 1: Requesting API
### 1a. Create a function requesting data from sreality

```python
base_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb=1&category_type_cb=1&locality_region_id=10&per_page60&page={}'.format(i)

r = requests.get(base_url)
d = r.json()
```

* function should parametrize: 
    * `category_main_cb` - `{'flat':1, 'house':2, 'land':3 }`
    * `category_type_cb` - `{'sell':1,'rent':2}`
    * `locality_region_id` - `{'Praha':10,'Brno':14}`
    * `page` parameter
* use string inputs for `category_main_cb` and `category_type_cb`
* test the validity of inputs
* include try/except clause to handle errors
* function should return JSON data in python types
* do not forget to sleep each request at least 0.5s

In [44]:
import requests
import time

def request_sreality_data(category_main, category_type, locality_region, page):
    category_main_cb = {'flat': 1, 'house': 2, 'land': 3}
    category_type_cb = {'sell': 1, 'rent': 2}
    locality_region_id = {'Praha': 10, 'Brno': 14}

    if category_main not in category_main_cb:
        raise ValueError("Invalid category_main value. Valid options are 'flat', 'house', and 'land'.")

    if category_type not in category_type_cb:
        raise ValueError("Invalid category_type value. Valid options are 'sell' and 'rent'.")

    if locality_region not in locality_region_id:
        raise ValueError("Invalid locality_region value. Valid options are 'Praha' and 'Brno'.")

    base_url = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={}&category_type_cb={}&locality_region_id={}&per_page=60&page={}'.format(
        category_main_cb[category_main], category_type_cb[category_type], locality_region_id[locality_region], page
    )

    try:
        time.sleep(0.5)
        r = requests.get(base_url)
        r.raise_for_status()
        data = r.json()
        return data
    except requests.exceptions.RequestException as e:
        print(f"An error occurred while making the request: {e}")
        return None

# Example usage:
data = request_sreality_data('flat', 'sell', 'Praha', 1)
print(data)

{'meta_description': '5836 realit v nabídce prodej bytů Praha. Vyberte si novou nemovitost na sreality.cz s hledáním na mapě a velkými náhledy fotografií nabízených bytů.', 'result_size': 5836, '_embedded': {'estates': [{'labelsReleased': [[], []], 'has_panorama': 0, 'labels': [], 'is_auction': False, 'labelsAll': [['new_building', 'personal', 'balcony', 'terrace', 'cellar', 'elevator', 'garage'], ['playground', 'small_shop', 'candy_shop', 'vet', 'tavern', 'theater', 'movies', 'natural_attraction', 'sightseeing', 'drugstore', 'tram', 'bus_public_transport', 'school', 'sports', 'medic', 'shop', 'atm', 'kindergarten', 'restaurant', 'post_office', 'train', 'metro']], 'seo': {'category_main_cb': 1, 'category_sub_cb': 8, 'category_type_cb': 1, 'locality': 'praha-smichov-na-hrebenkach'}, 'exclusively_at_rk': 0, 'category': 1, 'has_floor_plan': 1, '_embedded': {'favourite': {'is_favourite': False, '_links': {'self': {'profile': '/favourite/doc', 'href': '/cs/v2/favourite/3389986636', 'title':

In [40]:
def request_sreality(page, category_main='flat', category_type='sell', locality_region='Praha'):
    
    time.sleep(0.5)

    category_mains = {'flat':1, 'house':2, 'land':3 }
    category_types = {'sell':1,'rent':2}
    region_mapping = {'Praha':10, 'Brno':14}
    
    if category_main not in category_mains:
        raise Exception(f'Unknown category main {category_main}')
    
    if category_type not in category_types:
        raise Exception(f'Unknown category type {category_type}')
    
    if locality_region not in region_mapping:
        raise Exception(f'Unknown locality region {locality_region}')
    
    url_template = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main_cb}&category_type_cb={category_type_cb}&locality_region_id={locality_region_id}&per_page60&page={page}'
    
    try:
        url = url_template.format(
            category_main_cb=category_mains[category_main],
            category_type_cb=category_types[category_type],
            locality_region_id=region_mapping[locality_region],
            page=page
        )

        r = requests.get(url)

        return r.json()
    except Exception as e:
        print(e)
d = request_sreality(0)
d.keys()


dict_keys(['meta_description', 'result_size', '_embedded', 'filterLabels', 'title', 'filter', '_links', 'locality', 'locality_dativ', 'logged_in', 'per_page', 'category_instrumental', 'page', 'filterLabels2'])

In [41]:
import requests
import time

def request_sreality_data(category_main_cb, category_type_cb, locality_region_id, page):
    # check validity of inputs
    valid_categories = {'flat':1, 'house':2, 'land':3}
    valid_types = {'sell':1, 'rent':2}
    valid_regions = {'Praha':10, 'Brno':14}
    if category_main_cb not in valid_categories:
        raise ValueError(f"Invalid category_main_cb '{category_main_cb}'. Valid options are {valid_categories.keys()}")
    if category_type_cb not in valid_types:
        raise ValueError(f"Invalid category_type_cb '{category_type_cb}'. Valid options are {valid_types.keys()}")
    if locality_region_id not in valid_regions:
        raise ValueError(f"Invalid locality_region_id '{locality_region_id}'. Valid options are {valid_regions.keys()}")
    
    # make request and handle errors
    base_url = f'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={valid_categories[category_main_cb]}&category_type_cb={valid_types[category_type_cb]}&locality_region_id={valid_regions[locality_region_id]}&per_page=60&page={page}'
    try:
        r = requests.get(base_url)
        r.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error: {e}")
        return None
    
    # sleep for at least 0.5 seconds between requests
    time.sleep(0.5)
    
    # return JSON data
    return r.json()

In [42]:
d=request_sreality_data('flat','sell','Praha',0)
d.keys()

dict_keys(['meta_description', 'result_size', '_embedded', 'filterLabels', 'title', 'filter', '_links', 'locality', 'locality_dativ', 'logged_in', 'per_page', 'category_instrumental', 'page', 'filterLabels2'])

### 1b. Create a function converting sreality json data into pandas dataframe

In [46]:
def sreality_json_to_df(sreality_data):
    return pd.DataFrame(sreality_data['_embedded']['estates'])
sreality_json_to_df(d)

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[], []]",0,[],False,"[[personal, after_reconstruction, balcony, bri...","{'category_main_cb': 1, 'category_sub_cb': 10,...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2027628892,0,29149000,"{'value_raw': 29149000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 5+kk 152 m² (Podkrovní),2453450,"{'lat': 50.09108758339895, 'lon': 14.410173416...",False
1,"[[], []]",0,[],False,"[[personal, panel, elevator], [playground, nat...","{'category_main_cb': 1, 'category_sub_cb': 2, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2592592972,0,3769000,"{'value_raw': 3769000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+kk 27 m²,0,"{'lat': 50.11257558339895, 'lon': 14.499960416...",False
2,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2891884364,0,5719000,"{'value_raw': 5719000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 41 m²,0,"{'lat': 50.01912358339895, 'lon': 14.617164416...",False
3,"[[furnished], []]",0,[Vybavený],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 10,...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2505213004,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 5+kk 162 m²,0,"{'lat': 50.06036758339895, 'lon': 14.468755416...",True
4,"[[not_furnished], []]",0,[Nevybavený],False,"[[personal, after_reconstruction, brick, cella...","{'category_main_cb': 1, 'category_sub_cb': 3, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,544741196,0,4271000,"{'value_raw': 4271000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+1 37 m²,0,"{'lat': 50.03628958339895, 'lon': 14.418009416...",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,"[[], []]",0,[],False,"[[new_building, personal, balcony, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,684078156,0,9047000,"{'value_raw': 9047000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 56 m²,0,"{'lat': 50.04055458339895, 'lon': 14.341896416...",False
57,"[[collective, not_furnished], []]",0,"[Družstevní, Nevybavený]",False,"[[collective, panel, elevator, not_furnished],...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,281707596,0,5528000,"{'value_raw': 5528000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 67 m²,0,"{'lat': 50.07931958339895, 'lon': 14.499312416...",True
58,"[[], []]",0,[],False,"[[personal, loggia, brick, cellar, elevator, g...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3236873292,0,13971000,"{'value_raw': 13971000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 117 m²,0,"{'lat': 50.060169583398945, 'lon': 14.35936741...",False
59,"[[not_furnished], []]",0,[Nevybavený],False,"[[personal, panel, not_furnished], [playground...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3480339532,0,4171000,"{'value_raw': 4171000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 42 m²,0,"{'lat': 50.07983458339895, 'lon': 14.318833416...",True


In [29]:
import pandas as pd

def json_to_dataframe(json_data):
    df = pd.DataFrame(json_data['_embedded']['estates'])
    return df

In [30]:
json_to_dataframe(data)

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[collective], []]",0,[Družstevní],False,"[[collective, loggia, panel, cellar, elevator,...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,3036361804,0,6179000,"{'value_raw': 6179000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 80 m²,2447873,"{'lat': 49.998739069203815, 'lon': 14.43395993...",False
1,"[[], []]",0,[],False,"[[personal, brick, cellar, parking_lots, not_f...","{'category_main_cb': 1, 'category_sub_cb': 9, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2355541580,0,18669000,"{'value_raw': 18669000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+1 179 m² (Mezonet),0,"{'lat': 50.02736106920381, 'lon': 14.435021930...",False
2,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3270218828,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 67 m²,0,"{'lat': 50.05026806920381, 'lon': 14.424481930...",False
3,"[[], []]",0,[],False,"[[personal, after_reconstruction, balcony, bri...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2917897292,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 139 m²,0,"{'lat': 50.05026806920381, 'lon': 14.424481930...",False
4,"[[], []]",0,[],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 5, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3706426444,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+1 91 m²,0,"{'lat': 50.05026806920381, 'lon': 14.424481930...",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,"[[], []]",0,[],False,"[[personal, balcony, loggia, brick, cellar, el...","{'category_main_cb': 1, 'category_sub_cb': 8, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,506103628,0,20480000,"{'value_raw': 20480000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 4+kk 186 m²,0,"{'lat': 50.09045206920381, 'lon': 14.408879930...",False
57,"[[], []]",0,[],False,"[[personal, loggia, panel, cellar, elevator, n...","{'category_main_cb': 1, 'category_sub_cb': 5, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1941210700,0,7760000,"{'value_raw': 7760000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+1 79 m²,0,"{'lat': 50.039648069203814, 'lon': 14.52572493...",False
58,"[[], []]",0,[],False,"[[personal, loggia, brick, cellar, not_furnish...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1944839244,0,10778000,"{'value_raw': 10778000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 123 m²,0,"{'lat': 50.04655606920381, 'lon': 14.514462930...",False
59,"[[], [medic]]",0,[Lékař 5 min. pěšky],False,"[[new_building, personal, balcony, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2688874316,0,10707000,"{'value_raw': 10707000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 71 m²,0,"{'lat': 50.09342806920381, 'lon': 14.489923930...",False


In [47]:
def sreality_json_to_df(sreality_data):
    return pd.DataFrame(sreality_data['_embedded']['estates'])

sreality_json_to_df(d)

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[], []]",0,[],False,"[[personal, after_reconstruction, balcony, bri...","{'category_main_cb': 1, 'category_sub_cb': 10,...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2027628892,0,29149000,"{'value_raw': 29149000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 5+kk 152 m² (Podkrovní),2453450,"{'lat': 50.09108758339895, 'lon': 14.410173416...",False
1,"[[], []]",0,[],False,"[[personal, panel, elevator], [playground, nat...","{'category_main_cb': 1, 'category_sub_cb': 2, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2592592972,0,3769000,"{'value_raw': 3769000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+kk 27 m²,0,"{'lat': 50.11257558339895, 'lon': 14.499960416...",False
2,"[[], []]",0,[],False,"[[new_building, personal, terrace, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2891884364,0,5719000,"{'value_raw': 5719000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 41 m²,0,"{'lat': 50.01912358339895, 'lon': 14.617164416...",False
3,"[[furnished], []]",0,[Vybavený],False,"[[personal, after_reconstruction, brick, eleva...","{'category_main_cb': 1, 'category_sub_cb': 10,...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2505213004,0,0,"{'value_raw': 0, 'unit': '', 'name': 'Celková ...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 5+kk 162 m²,0,"{'lat': 50.06036758339895, 'lon': 14.468755416...",True
4,"[[not_furnished], []]",0,[Nevybavený],False,"[[personal, after_reconstruction, brick, cella...","{'category_main_cb': 1, 'category_sub_cb': 3, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,544741196,0,4271000,"{'value_raw': 4271000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+1 37 m²,0,"{'lat': 50.03628958339895, 'lon': 14.418009416...",False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,"[[], []]",0,[],False,"[[new_building, personal, balcony, brick, cell...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,684078156,0,9047000,"{'value_raw': 9047000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 56 m²,0,"{'lat': 50.04055458339895, 'lon': 14.341896416...",False
57,"[[collective, not_furnished], []]",0,"[Družstevní, Nevybavený]",False,"[[collective, panel, elevator, not_furnished],...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,281707596,0,5528000,"{'value_raw': 5528000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 67 m²,0,"{'lat': 50.07931958339895, 'lon': 14.499312416...",True
58,"[[], []]",0,[],False,"[[personal, loggia, brick, cellar, elevator, g...","{'category_main_cb': 1, 'category_sub_cb': 7, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3236873292,0,13971000,"{'value_raw': 13971000, 'unit': '', 'name': 'C...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 3+1 117 m²,0,"{'lat': 50.060169583398945, 'lon': 14.35936741...",False
59,"[[not_furnished], []]",0,[Nevybavený],False,"[[personal, panel, not_furnished], [playground...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,3480339532,0,4171000,"{'value_raw': 4171000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 42 m²,0,"{'lat': 50.07983458339895, 'lon': 14.318833416...",True


### 1c. link function `1b` into function `1a`

In [48]:
import requests
import pandas as pd
import time

def request_sreality(page, category_main='flat', category_type='sell', locality_region='Praha'):

    time.sleep(0.5)

    category_mains = {'flat':1, 'house':2, 'land':3 }
    category_types = {'sell':1,'rent':2}
    region_mapping = {'Praha':10, 'Brno':14}

    if category_main not in category_mains:
        raise Exception(f'Unknown category main {category_main}')
    
    if category_type not in category_types:
        raise Exception(f'Unknown category type {category_type}')
    
    if locality_region not in region_mapping:
        raise Exception(f'Unknown locality region {locality_region}')
    
    url_template = 'https://www.sreality.cz/api/cs/v2/estates?category_main_cb={category_main_cb}&category_type_cb={category_type_cb}&locality_region_id={locality_region_id}&per_page60&page={page}'
     
    try:
        url = url_template.format(
            category_main_cb=category_mains[category_main],
            category_type_cb=category_types[category_type],
            locality_region_id=region_mapping[locality_region],
            page=page
        )

        r=requests.get(url)

        return sreality_json_to_df(r.json())
    

        labelsReleased  has_panorama                 labels  is_auction  \
0             [[], []]             0                     []       False   
1             [[], []]             0                     []       False   
2             [[], []]             0                     []       False   
3        [[], [train]]             0  [Vlak 86 metrů pěšky]       False   
4   [[collective], []]             0           [Družstevní]       False   
..                 ...           ...                    ...         ...   
56            [[], []]             0                     []       False   
57            [[], []]             0                     []       False   
58            [[], []]             0                     []       False   
59            [[], []]             0                     []       False   
60            [[], []]             0                     []       False   

                                            labelsAll  \
0   [[new_building, personal, balcony, cel

### 1c. Combining multiple requests into single df

* Function should parametrize:
    * `start_page` and `end_page`
    * request parameters
* construct a list of individual request dfs
* then feed it into `pd.concat` function

In [17]:
list(range(1,10))

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [50]:
def multiple_sreality_requests(start_page,end_page,category_main='flat', category_type='sell', locality_region='Praha'):
    return pd.concat([
        request_sreality(i, category_main=category_main, category_type=category_type, locality_region=locality_region) 
        for i in range(start_page,end_page+1)
    ])

raw = multiple_sreality_requests(0,4)
raw.head()

TypeError: cannot concatenate object of type '<class 'dict'>'; only Series and DataFrame objs are valid

## Task 2: Cleaning data

### 2a. Filter columns
* filter only columns: `['locality', 'price', 'name', 'gps','hash_id','labelsAll','exclusively_at_rk']`
* use `.copy()` to avoid `SettingWithCopyWarning` later


In [54]:
def multiple_sreality_requests(start_page,end_page,category_main='flat', category_type='sell', locality_region='Praha'):
    return pd.concat([
        request_sreality(i, category_main=category_main, category_type=category_type, locality_region=locality_region) 
        for i in range(start_page,end_page+1)
    ])

raw = multiple_sreality_requests(0,4)
raw.head()

Unnamed: 0,labelsReleased,has_panorama,labels,is_auction,labelsAll,seo,exclusively_at_rk,category,has_floor_plan,_embedded,...,hash_id,attractive_offer,price,price_czk,_links,rus,name,region_tip,gps,has_matterport_url
0,"[[new_building, balcony, parking_lots], []]",0,"[Novostavba, Balkon, Parkování]",False,"[[new_building, personal, balcony, cellar, ele...","{'category_main_cb': 1, 'category_sub_cb': 2, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,1225335884,0,6655000,"{'value_raw': 6655000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 1+kk 45 m²,2449079,"{'lat': 50.07450858339895, 'lon': 14.456531416...",False
1,"[[], []]",0,[],False,"[[personal, brick, cellar, not_furnished], []]","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,163661388,0,5966000,"{'value_raw': 5966000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 42 m²,0,"{'lat': 50.04776458339895, 'lon': 14.439760416...",False
2,"[[], []]",0,[],False,"[[personal, brick, cellar, elevator], []]","{'category_main_cb': 1, 'category_sub_cb': 4, ...",0,1,0,"{'favourite': {'is_favourite': False, '_links'...",...,2367013964,0,9689000,"{'value_raw': 9689000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 51 m²,0,"{'lat': 50.09207958339895, 'lon': 14.399234416...",False
3,"[[after_reconstruction], [medic]]",0,"[Po rekonstrukci, Lékař 6 min. pěšky]",False,"[[personal, after_reconstruction, brick, cella...","{'category_main_cb': 1, 'category_sub_cb': 5, ...",0,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,2045167436,0,5719000,"{'value_raw': 5719000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+1 45 m²,0,"{'lat': 50.09399858339895, 'lon': 14.458484416...",False
4,"[[balcony], [train]]",0,"[Balkon, Vlak 4 min. pěšky]",False,"[[personal, balcony, brick, elevator], [vet, s...","{'category_main_cb': 1, 'category_sub_cb': 4, ...",1,1,1,"{'favourite': {'is_favourite': False, '_links'...",...,739587148,0,7025000,"{'value_raw': 7025000, 'unit': '', 'name': 'Ce...",{'dynamicDown': [{'href': 'https://d18-a.sdn.c...,False,Prodej bytu 2+kk 67 m² (Mezonet),0,"{'lat': 49.973812583398946, 'lon': 14.37474641...",True


In [56]:
# Assuming 'raw' is the DataFrame you got from the previous step
filtered_columns = ['locality', 'price', 'name', 'gps', 'hash_id', 'labelsAll', 'exclusively_at_rk']
filtered_df = raw[filtered_columns].copy()

# Display the first few rows of the filtered DataFrame
filtered_df.head()

Unnamed: 0,locality,price,name,gps,hash_id,labelsAll,exclusively_at_rk
0,Praha 3 - Žižkov,6655000,Prodej bytu 1+kk 45 m²,"{'lat': 50.07450858339895, 'lon': 14.456531416...",1225335884,"[[new_building, personal, balcony, cellar, ele...",1
1,Praha 4 - Nusle,5966000,Prodej bytu 2+kk 42 m²,"{'lat': 50.04776458339895, 'lon': 14.439760416...",163661388,"[[personal, brick, cellar, not_furnished], []]",0
2,Praha 6 - Dejvice,9689000,Prodej bytu 2+kk 51 m²,"{'lat': 50.09207958339895, 'lon': 14.399234416...",2367013964,"[[personal, brick, cellar, elevator], []]",0
3,Praha 7 - Holešovice,5719000,Prodej bytu 2+1 45 m²,"{'lat': 50.09399858339895, 'lon': 14.458484416...",2045167436,"[[personal, after_reconstruction, brick, cella...",0
4,Praha 5 - Radotín,7025000,Prodej bytu 2+kk 67 m² (Mezonet),"{'lat': 49.973812583398946, 'lon': 14.37474641...",739587148,"[[personal, balcony, brick, elevator], [vet, s...",1


### 2b: GPS
* Convert dictionary in `gps` column into two columns - `lat` and `lon`
* use apply function on gps column
* Note apply can return multiple columns

In [62]:
# Assuming 'raw' is the DataFrame you got from the multiple_sreality_requests function
gps_columns = raw['gps'].apply(pd.Series)
filtered_df['lat'] = gps_columns['lat']
filtered_df['lon'] = gps_columns['lon']

# Display the first few rows of the DataFrame with the new 'lat' and 'lon' columns
filtered_df.head()

Unnamed: 0,locality,price,name,hash_id,labelsAll,exclusively_at_rk,lat,lon
0,Praha 3 - Žižkov,6655000,Prodej bytu 1+kk 45 m²,1225335884,"[[new_building, personal, balcony, cellar, ele...",1,50.074509,14.456531
1,Praha 4 - Nusle,5966000,Prodej bytu 2+kk 42 m²,163661388,"[[personal, brick, cellar, not_furnished], []]",0,50.047765,14.43976
2,Praha 6 - Dejvice,9689000,Prodej bytu 2+kk 51 m²,2367013964,"[[personal, brick, cellar, elevator], []]",0,50.09208,14.399234
3,Praha 7 - Holešovice,5719000,Prodej bytu 2+1 45 m²,2045167436,"[[personal, after_reconstruction, brick, cella...",0,50.093999,14.458484
4,Praha 5 - Radotín,7025000,Prodej bytu 2+kk 67 m² (Mezonet),739587148,"[[personal, balcony, brick, elevator], [vet, s...",1,49.973813,14.374746


### 2b. Get flat type from name
* Name is always represented by string `Prodej bytu [type of flat] [Area] m^2`
* try picking third word in string
* check meaningfulness using `.value_counts()`

In [66]:
# Assuming 'filtered_df' is the DataFrame you got from the previous steps
filtered_df['flat_type'] = filtered_df['name'].apply(lambda x: x.split()[2])

# Display the distribution of unique flat types
print(filtered_df['flat_type'].value_counts())

1+kk        30
2+kk        29
3+kk        15
3+1         11
2+1          5
4+1          5
4+kk         5
1+1          2
5+kk         1
atypické     1
5+1          1
Name: flat_type, dtype: int64


In [65]:
def pick_third_word(x):
    return x.split(' ')[2]

clean['name'].apply(pick_third_word)

NameError: name 'clean' is not defined

### 2c. Get area from name
* Naive: select the word before last word
* Then try navigating using the index of `'m²'`
* if this also fail, then you will need to use regex

In [69]:
filtered_df['area'] = filtered_df['name'].apply(lambda x: x.split()[-2])
print(filtered_df['area'].head())

def extract_area(s):
    words = s.split()
    try:
        idx = words.index('m²')
        return words[idx - 1]
    except ValueError:
        return None

filtered_df['area'] = filtered_df['name'].apply(extract_area)
print(filtered_df['area'].head())

import re

def extract_area_regex(s):
    match = re.search(r'(\d+)\s*m²', s)
    if match:
        return match.group(1)
    return None

filtered_df['area'] = filtered_df['name'].apply(extract_area_regex)
print(filtered_df['area'].head())

0    45
1    42
2    51
3    45
4    m²
Name: area, dtype: object
0    45
1    42
2    51
3    45
4    67
Name: area, dtype: object
0    45
1    42
2    51
3    45
4    67
Name: area, dtype: object


## Task 3 (Homework): Convert column`labelsAll` into boolean variables

### Task 3a. Get all possible label names
* deal with nested-list structure
* Hint: try sum the whole column to get a nested list of lists.
* Then flatten the nested list (2D to 1D)
* Finally keep only unique elements


In [70]:
import pandas as pd
import numpy as np

### 4b. Test existence of label `cellar` for offers
* again deal with nested list of list structure
* write generic function `test_existence_of_label(offer_labels,label)`

### 4c. Test existence of all possible labels
* use apply returning series with all labels