In [95]:
import json
import pandas as pd
from shapely.geometry import shape

In [96]:
with open('data/lviv.geojson', 'r', encoding='utf-8') as f:
    data = json.load(f)
    features = data.get('features', [])
    df = pd.DataFrame([f['properties'] for f in features])

In [97]:
df['amenity'].value_counts()

amenity
bench             2596
parking           1578
waste_basket      1043
parking_space      925
waste_disposal     821
                  ... 
watering_place       1
training             1
stripclub            1
dressing_room        1
brothel              1
Name: count, Length: 101, dtype: int64

In [98]:
df['craft'].value_counts()

craft
shoemaker                  55
tailor                     36
electronics_repair         26
window_construction        22
key_cutter                 16
electrician                13
optician                    7
dressmaker                  7
brewery                     7
photographic_laboratory     6
watchmaker                  6
jeweller                    5
glaziery                    5
clockmaker                  5
locksmith                   4
photographer                4
handicraft                  3
confectionery               3
roofer                      3
beekeeper                   3
carpenter                   2
upholsterer                 2
blacksmith                  2
oil_mill                    1
sculptor                    1
metal_construction          1
bookbinder                  1
repair                      1
plumber                     1
stonemason                  1
painter                     1
gardener                    1
photo_studio                1
tile

In [99]:
df['healthcare'].value_counts()

healthcare
pharmacy             339
dentist              102
clinic                83
hospital              64
laboratory            36
doctor                32
sample_collection     21
yes                    4
rehabilitation         4
psychotherapist        3
speech_therapist       2
centre                 2
birthing_centre        2
podiatrist             2
blood_donation         1
counselling            1
optometrist            1
Name: count, dtype: int64

In [100]:
df['historic'].value_counts()

historic
memorial               194
tomb                    53
wayside_cross           53
yes                     24
monument                20
house                   18
ruins                   13
vehicle                 13
railway_car             11
railway                 10
church                   7
building                 6
tower                    4
castle                   4
heritage                 3
citywalls                3
tank                     2
water_well               2
fort                     2
barn                     1
rotunda                  1
city_gate                1
manor                    1
archaeological_site      1
watermill                1
school                   1
chapel                   1
wayside_shrine           1
locomotive               1
cannon                   1
ship                     1
Name: count, dtype: int64

In [101]:
df['leisure'].value_counts()

leisure
playground         765
pitch              449
fitness_station    166
park               109
track               65
fitness_centre      49
sports_centre       41
garden              32
stadium             26
picnic_table        23
swimming_pool       22
sauna               14
dog_park             9
firepit              8
bleachers            8
outdoor_seating      7
escape_game          5
dance                4
sports_hall          3
ice_rink             3
indoor_play          3
horse_riding         2
resort               1
nature_reserve       1
common               1
bandstand            1
trampoline_park      1
tanning_salon        1
Name: count, dtype: int64

In [102]:
df['office'].value_counts()

office
company                    109
notary                      75
government                  74
lawyer                      53
insurance                   35
it                          26
estate_agent                19
travel_agent                19
ngo                         17
diplomatic                  14
telecommunication           13
yes                         12
coworking                   11
research                     8
educational_institution      6
architect                    5
advertising_agency           5
association                  5
accountant                   3
foundation                   3
administrative               2
security                     2
water_utility                2
publisher                    2
newspaper                    2
consulting                   2
political_party              2
employment_agency            2
property_management          1
guide                        1
translator                   1
post_office                  1
g

In [103]:
df['public_transport'].value_counts()

public_transport
platform         1373
stop_position     789
station            18
Name: count, dtype: int64

In [104]:
df['shop'].value_counts()

shop
convenience           779
clothes               386
alcohol               346
general               332
supermarket           212
                     ... 
hairdresser_supply      1
lottery                 1
storage_rental          1
hvac                    1
printer_ink             1
Name: count, Length: 151, dtype: int64

In [105]:
df['tourism'].value_counts()

tourism
hotel           110
artwork          98
hostel           65
information      63
museum           61
attraction       51
viewpoint        27
apartment        19
guest_house      11
theme_park        7
gallery           6
picnic_site       6
motel             5
camp_site         2
zoo               2
resort            1
chalet            1
apartments        1
caravan_site      1
aquarium          1
yes               1
Name: count, dtype: int64

In [106]:
df['addr:street'].value_counts()

addr:street
вулиця Тараса Шевченка        103
Зелена вулиця                  75
Городоцька вулиця              54
Стрийська вулиця               51
проспект Червоної Калини       51
                             ... 
Підгірна вулиця                 1
вулиця Віри, Надії, Любові      1
Природна вулиця                 1
вулиця Братів Дужих             1
вулиця Івана Огієнка            1
Name: count, Length: 505, dtype: int64

In [107]:
columns = ['geometry', 'type', 'street', 'amenity', 'craft', 'healthcare', 'historic', 'leisure', 'office', 'public_transport', 'shop', 'tourism']

In [108]:
rows = []
for feature in features:
    
    props = feature.get('properties', {})

    rel_tags = {}
    if '@relations' in props:
        for rel in props['@relations']:
            rel_tags.update(rel.get('reltags', {}))
    
    row = {
        'geometry': str(shape(feature.get('geometry', {}))),
        'type': props.get('type', rel_tags.get('type', None)),
        'street': props.get('addr:street', None),
        'amenity': props.get('amenity', rel_tags.get('amenity', None)),
        'craft': props.get('craft', None),
        'healthcare': props.get('healthcare', None),
        'historic': props.get('historic', rel_tags.get('historic', None)),
        'leisure': props.get('leisure', None),
        'office': props.get('office', None),
        'public_transport': props.get('public_transport', rel_tags.get('public_transport', None)),
        'shop': props.get('shop', rel_tags.get('shop', None)),
        'tourism': props.get('tourism', None)
    }
    rows.append(row)

df = pd.DataFrame(rows, columns=columns)

In [109]:
df

Unnamed: 0,geometry,type,street,amenity,craft,healthcare,historic,leisure,office,public_transport,shop,tourism
0,"POLYGON ((24.0276306 49.8432171, 24.0276539 49...",multipolygon,проспект Свободи,,,,,,,,,museum
1,"POLYGON ((24.0336171 49.843526, 24.0336181 49....",multipolygon,Вірменська вулиця,,,,,,,,,museum
2,"POLYGON ((24.0422877 49.8353952, 24.0418367 49...",multipolygon,,place_of_worship,,,,,,,,
3,"POLYGON ((24.0533003 49.8353561, 24.0527459 49...",multipolygon,,,,,memorial,,,,,
4,"POLYGON ((24.0289493 49.842662, 24.0290881 49....",multipolygon,Театральна вулиця,,,,,,,,,museum
...,...,...,...,...,...,...,...,...,...,...,...,...
24979,POINT (24.0341638 49.8268126),public_transport,,,,,,,,stop_area,,
24980,POINT (24.0341882 49.8267575),public_transport,,,,,,,,stop_area,,
24981,POINT (24.0005699 49.8289801),public_transport,,,,,,,,stop_area,,
24982,POINT (24.0017999 49.8447741),public_transport,,,,,,,,stop_area,,


In [110]:
df[df[['street', 'amenity', 'craft', 'healthcare', 'historic', 'leisure', 'office', 'public_transport', 'shop', 'tourism']].isna().all(axis=1)]

Unnamed: 0,geometry,type,street,amenity,craft,healthcare,historic,leisure,office,public_transport,shop,tourism


In [111]:
df.to_csv('data/lviv.csv', index=False, encoding='utf-8')