In [146]:
import json
import pandas as pd
from shapely.geometry import shape

In [147]:
with open('data/kyiv.geojson', 'r', encoding='utf-8') as f:
    data = json.load(f)
    features = data.get('features', [])
    df = pd.DataFrame([f['properties'] for f in features])

In [148]:
df['amenity'].value_counts()

amenity
bench             5349
parking           4743
waste_disposal    2377
parking_space     2153
cafe              2075
                  ... 
cemetery             1
dog_toilet           1
funeral_hall         1
loading_dock         1
fixme                1
Name: count, Length: 165, dtype: int64

In [149]:
df['craft'].value_counts()

craft
shoemaker                     115
electronics_repair             85
tailor                         47
hvac                           33
key_cutter                     27
dressmaker                     26
photographer                   19
yes                            13
watchmaker                     11
photographic_laboratory        10
clockmaker                     10
metal_construction             10
window_construction             9
brewery                         8
handicraft                      8
confectionery                   6
electrician                     5
sawmill                         4
stonemason                      4
jeweller                        4
gardener                        4
print_shop                      3
pottery                         3
bakery                          3
optician                        2
photo_studio                    2
locksmith                       2
builder                         2
printer                         2
key_cutt

In [150]:
df['healthcare'].value_counts()

healthcare
pharmacy              1077
clinic                 354
dentist                342
hospital               141
laboratory             136
doctor                  84
sample_collection       68
yes                     15
centre                   8
psychotherapist          7
alternative              7
blood_donation           5
blood_bank               3
optometrist              3
birthing_centre          2
rehabilitation           2
podiatrist               2
counselling              1
vaccination_centre       1
Name: count, dtype: int64

In [151]:
df['historic'].value_counts()

historic
memorial               1170
aircraft                 98
ruins                    62
building                 53
heritage                 46
monument                 41
wayside_cross            39
vehicle                  25
yes                      20
citywalls                17
cannon                   15
tomb                     14
archaeological_site      11
tank                     10
castle                    5
church                    3
wayside_shrine            3
boundary_stone            2
no                        2
railway                   2
city_gate                 2
monastery                 2
railway_car               2
bicycle                   1
wreck                     1
trade_route               1
ship                      1
tram                      1
battlefield               1
grave                     1
tree                      1
locomotive                1
Name: count, dtype: int64

In [152]:
df['leisure'].value_counts()

leisure
playground                      3603
pitch                           2233
park                             676
fitness_station                  335
sports_centre                    199
track                            198
fitness_centre                   194
garden                           146
picnic_table                     118
swimming_pool                     83
dog_park                          53
stadium                           53
sauna                             41
marina                            27
nature_reserve                    27
dance                             25
bleachers                         25
resort                            23
outdoor_seating                   21
fishing                           20
bandstand                         11
sports_hall                       11
escape_game                       10
ice_rink                           8
horse_riding                       8
yes                                7
firepit                       

In [153]:
df['office'].value_counts()

office
company                    542
government                 402
lawyer                     199
notary                     164
yes                         94
research                    88
diplomatic                  83
insurance                   67
ngo                         51
it                          47
telecommunication           37
estate_agent                31
travel_agent                25
coworking                   19
educational_institution     17
advertising_agency          16
newspaper                   14
architect                   13
administrative              10
publisher                    8
financial                    7
energy_supplier              6
consulting                   5
political_party              5
forestry                     5
logistics                    5
property_management          5
post_office                  5
water_utility                5
employment_agency            4
charity                      4
accountant                   4
s

In [154]:
df['public_transport'].value_counts()

public_transport
platform         3189
stop_position    2432
station           132
2                   1
stop_area           1
Name: count, dtype: int64

In [155]:
df['shop'].value_counts()

shop
convenience                     1406
clothes                          949
supermarket                      774
hairdresser                      683
beauty                           656
                                ... 
lighting;interior_decoration       1
candles                            1
dive                               1
lifestyle_store                    1
candy store                        1
Name: count, Length: 190, dtype: int64

In [156]:
df['tourism'].value_counts()

tourism
artwork                 474
attraction              255
information             210
picnic_site             201
museum                  168
hotel                   153
viewpoint               114
hostel                   77
camp_site                49
gallery                  25
theme_park               25
guest_house              16
apartment                13
motel                    12
camp_pitch                7
zoo                       7
yes                       6
caravan_site              4
chalet                    2
no                        2
aquarium                  1
memorial                  1
trail_riding_station      1
Name: count, dtype: int64

In [157]:
df['addr:street'].value_counts()

addr:street
Берестейський проспект         159
Велика Васильківська вулиця    113
вулиця Антоновича               93
Оболонський проспект            89
Костянтинівська вулиця          84
                              ... 
вулиця Академіка Книшова         1
Причальна вулиця                 1
Інженерна вулиця                 1
Лисогірська вулиця               1
вулиця Софії Ковалевської        1
Name: count, Length: 974, dtype: int64

In [158]:
columns = ['street', 'number', 'name', 'amenity', 'craft', 'healthcare', 'historic', 'leisure', 'office', 'public_transport', 'shop', 'tourism', 'geometry']
rows = []
for feature in features:
    
    props = feature.get('properties', {})

    rel_tags = {}
    if '@relations' in props:
        for rel in props['@relations']:
            rel_tags.update(rel.get('reltags', {}))
    
    row = {
        'street': props.get('addr:street', None),
        'number': props.get('addr:housenumber', None),
        'name': props.get('name', rel_tags.get('name', None)),
        'amenity': props.get('amenity', rel_tags.get('amenity', None)),
        'craft': props.get('craft', rel_tags.get('craft', None)),
        'healthcare': props.get('healthcare', rel_tags.get('healthcare', None)),
        'historic': props.get('historic', rel_tags.get('historic', None)),
        'leisure': props.get('leisure', rel_tags.get('leisure', None)),
        'office': props.get('office', rel_tags.get('office', None)),
        'public_transport': props.get('public_transport', rel_tags.get('public_transport', None)),
        'shop': props.get('shop', rel_tags.get('shop', None)),
        'tourism': props.get('tourism', rel_tags.get('tourism', None)),
        'geometry': str(shape(feature.get('geometry', {}))),
    }
    rows.append(row)

df = pd.DataFrame(rows, columns=columns)

In [159]:
df

Unnamed: 0,street,number,name,amenity,craft,healthcare,historic,leisure,office,public_transport,shop,tourism,geometry
0,бульвар Тараса Шевченка,14,Жовтий корпус,,,,building,,,,,,"POLYGON ((30.5133456 50.4443557, 30.5133026 50..."
1,проспект Свободи,3-А,,kindergarten,,,,,,,,,"POLYGON ((30.4325994 50.5060648, 30.4327114 50..."
2,вулиця Наталії Ужвій,4-Б,Дошкільний навчальний заклад №775,kindergarten,,,,,,,,,"POLYGON ((30.4375313 50.5020899, 30.4382978 50..."
3,Новомостицька вулиця,3-Д,Дошкільний навчальний заклад №72,kindergarten,,,,,,,,,"POLYGON ((30.4352105 50.5010116, 30.4353344 50..."
4,вулиця Оноре де Бальзака,52-Б,ДЮСШ №18,,,,,sports_centre,,,,,"POLYGON ((30.6056153 50.5149762, 30.6051508 50..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
67903,,,"Ж/к ""Варшавський квартал""",,,,,,,stop_area,,,POINT (30.4196322 50.5052105)
67904,,,Метроград,,,,,,,,mall,,POINT (30.5190581 50.4403359)
67905,,,Площа Галицька,,,,,,,stop_area,,,POINT (30.4877875 50.4463173)
67906,,,Площа Галицька,,,,,,,stop_area,,,POINT (30.4890159 50.4463173)


In [160]:
df[df[['street', 'amenity', 'craft', 'healthcare', 'historic', 'leisure', 'office', 'public_transport', 'shop', 'tourism']].isna().all(axis=1)]

Unnamed: 0,street,number,name,amenity,craft,healthcare,historic,leisure,office,public_transport,shop,tourism,geometry


In [161]:
df.to_csv('data/kyiv.csv', index=False, encoding='utf-8')