In [39]:
import pandas as pd
from datasets import load_dataset
import ast

In [32]:
ds = load_dataset("ns2agi/antwerp-osm-navigator")

In [33]:
# only rows with amenity in the tags column dict
ds = ds["train"]
ds = ds.filter(lambda x: "amenity" in x["tags"])

In [34]:
df = ds.to_pandas()

In [37]:
df.head()

Unnamed: 0,id,type,tags,lat,lon
0,26741158,node,"{""amenity"": ""parking_entrance""}",51.183111,4.421245
1,26741174,node,"{""amenity"": ""parking_entrance""}",51.183584,4.422063
2,26978064,node,"{""disused:amenity"": ""ferry_terminal"", ""disused...",51.236584,4.429983
3,35513650,node,"{""amenity"": ""place_of_worship"", ""name"": ""Hoofd...",51.209367,4.423948
4,60574462,node,"{""amenity"": ""parking"", ""created_by"": ""JOSM""}",51.218321,4.428689


In [40]:
# get all unique values in the amenity column
# Ensure tags are parse
df['tags'] = df['tags'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

# Extract amenities
amenities = df['tags'].apply(lambda x: x.get('amenity') if isinstance(x, dict) and 'amenity' in x else None)

# Get unique non-null values
unique_amenities = amenities.dropna().unique()

# Sort and display
unique_amenities = sorted(unique_amenities)
print(unique_amenities)

['animal_breeding', 'animal_training', 'arts_centre', 'atm', 'baby_hatch', 'bank', 'bar', 'bench', 'bicycle_parking', 'bicycle_pooling', 'bicycle_rental', 'bicycle_repair_station', 'biergarten', 'binoculars', 'brothel', 'bureau_de_change', 'bus_station', 'cafe', 'car_rental', 'car_sharing', 'car_wash', 'casino', 'chair', 'charging_station', 'childcare', 'cinema', 'clinic', 'clock', 'coast_radar_station', 'college', 'community_centre', 'compressed_air', 'conference_centre', 'courthouse', 'coworking_space', 'crematorium', 'dentist', 'dive_centre', 'doctors', 'dog_toilet', 'dressing_room', 'drinking_water', 'driving_school', 'dropzone', 'events_centre', 'events_venue', 'exhibition_centre', 'fab_lab', 'fast_food', 'ferry_terminal', 'festival_grounds', 'fire_station', 'food_court', 'fountain', 'fuel', 'grave_yard', 'hospital', 'ice_cream', 'karaoke_box', 'kindergarten', 'letter_box', 'library', 'loading_dock', 'lounger', 'marketplace', 'mobility_hub', 'monastery', 'money_transfer', 'motorcy

In [41]:
medical_amenities = [
    'baby_hatch',
    'clinic',
    'dentist',
    'doctors',
    'hospital',
    'nursing_home',
    'pharmacy',
    'veterinary'
]

In [42]:
# Filter the dataset for medical amenities
medical_amenities_df = df[df['tags'].apply(lambda x: x.get('amenity') in medical_amenities if isinstance(x, dict) else False)]

In [44]:
# Display the filtered dataset
medical_amenities_df.shape

(262, 5)

In [50]:
medical_amenities_df

Unnamed: 0,id,type,tags,lat,lon
24,248701820,node,"{'amenity': 'pharmacy', 'entrance:kerb:height'...",51.219698,4.414930
25,248701898,node,"{'amenity': 'pharmacy', 'healthcare': 'pharmac...",51.224924,4.409755
26,248702004,node,"{'amenity': 'pharmacy', 'healthcare': 'pharmac...",51.226704,4.414979
50,297296979,node,"{'addr:housenumber': '291', 'amenity': 'veteri...",51.193080,4.446140
60,305753470,node,"{'amenity': 'pharmacy', 'created_by': 'Merkaar...",51.185573,4.342406
...,...,...,...,...,...
10291,783100218,way,"{'OnroerendErfgoed:criteria': 'M', 'addr:city'...",,
10445,823163832,way,"{'addr:city': 'Antwerpen', 'addr:housenumber':...",,
11465,982840306,way,"{'addr:city': 'Hoboken', 'addr:housenumber': '...",,
11951,1132949907,way,"{'addr:housenumber': '140', 'addr:street': 'Go...",,


In [54]:
# make an amenity type column
medical_amenities_df['amenity_type'] = medical_amenities_df['tags'].apply(lambda x: x.get('amenity') if isinstance(x, dict) else None)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medical_amenities_df['amenity_type'] = medical_amenities_df['tags'].apply(lambda x: x.get('amenity') if isinstance(x, dict) else None)


Unnamed: 0,id,type,tags,lat,lon,amenity_type
0,5489488477,node,"{'amenity': 'baby_hatch', 'operator': 'Moeders...",51.217836,4.434834,baby_hatch
1,5489488477,node,"{'amenity': 'baby_hatch', 'operator': 'Moeders...",51.217836,4.434834,baby_hatch
2,9587905560,node,"{'addr:city': 'Antwerpen', 'addr:housenumber':...",51.197793,4.394461,clinic
3,8529230495,node,"{'amenity': 'clinic', 'email': 'hoboken@gvhv.b...",51.182819,4.351146,clinic
4,7315573548,node,"{'amenity': 'dentist', 'healthcare': 'dentist'...",51.209514,4.399816,dentist
5,7850230585,node,"{'addr:city': 'Deurne', 'addr:housenumber': '2...",51.231989,4.467568,dentist
6,7302181586,node,"{'addr:city': 'Deurne', 'addr:housenumber': '1...",51.195482,4.456668,doctors
7,4575433690,node,"{'addr:housenumber': '30', 'addr:postcode': '2...",51.209815,4.448207,doctors
8,10660224,way,"{'amenity': 'hospital', 'check_date': '2025-03...",,,hospital
9,507879117,way,"{'addr:city': 'Antwerpen', 'addr:housenumber':...",,,hospital


In [62]:
def extract_data(tags, key):
    if isinstance(tags, dict):
        return tags.get(key, None)
    return None

In [68]:
# Extract specific keys from the tags column
medical_amenities_df['website'] = medical_amenities_df['tags'].apply(lambda x: extract_data(x, 'website') or extract_data(x, 'contact:website'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medical_amenities_df['website'] = medical_amenities_df['tags'].apply(lambda x: extract_data(x, 'website') or extract_data(x, 'contact:website'))


In [65]:
medical_amenities_df['phone'] = medical_amenities_df['tags'].apply(lambda x: extract_data(x, 'phone') or extract_data(x, 'contact:phone'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medical_amenities_df['phone'] = medical_amenities_df['tags'].apply(lambda x: extract_data(x, 'phone') or extract_data(x, 'contact:phone'))


In [69]:
medical_amenities_df['email'] = medical_amenities_df['tags'].apply(lambda x: extract_data(x, 'email') or extract_data(x, 'contact:email'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medical_amenities_df['email'] = medical_amenities_df['tags'].apply(lambda x: extract_data(x, 'email') or extract_data(x, 'contact:email'))


In [71]:
for row in medical_amenities_df.iterrows():
    # check if lat and lon columns are not null
    if pd.notnull(row[1]['lat']) and pd.notnull(row[1]['lon']):
        continue
    # extract 'addr:city' and 'addr:postcode' and 'addr:street' and 'addr:housenumber from the tags column
    tags = row[1]['tags']
    if not isinstance(tags, dict):
        # add column is_address_null
     medical_amenities_df.at[row[0], 'is_address_null'] = True
     continue
    # check if all keys are present
    if all(key in tags for key in ['addr:city', 'addr:postcode', 'addr:street', 'addr:housenumber']):
        # add column is_address_null
        medical_amenities_df.at[row[0], 'is_address_null'] = False
        # add address column
        medical_amenities_df.at[row[0], 'address'] = f"{tags['addr:street']} {tags['addr:housenumber']}, {tags['addr:postcode']} {tags['addr:city']}"
    else:
        # add column is_address_null
        medical_amenities_df.at[row[0], 'is_address_null'] = True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medical_amenities_df.at[row[0], 'is_address_null'] = False
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medical_amenities_df.at[row[0], 'address'] = f"{tags['addr:street']} {tags['addr:housenumber']}, {tags['addr:postcode']} {tags['addr:city']}"


In [73]:
# amenities where both the address and lat/lon are null
medical_amenities_df[medical_amenities_df['is_address_null'] & medical_amenities_df['lat'].isnull() & medical_amenities_df['lon'].isnull()]

Unnamed: 0,id,type,tags,lat,lon,amenity_type,website,phone,email,is_address_null,address
8390,9715814,way,"{'OnroerendErfgoed:criteria': 'M', 'addr:city'...",,,hospital,https://www.zas.be/locaties/zas-vincentius,+32 3 285 20 00,vincentius@zas.be,True,
8393,10660224,way,"{'amenity': 'hospital', 'check_date': '2025-03...",,,hospital,https://www.zna.be/nl/zna-sint-erasmus,+32 3 270 80 11,,True,
8469,45497265,way,"{'amenity': 'hospital', 'email': 'onthaal.deur...",,,hospital,https://azmonica.be/,+32 033 205 000,onthaal.deurne@azmonica.be,True,
8658,191546420,way,"{'addr:city': 'Wilrijk', 'addr:housenumber': '...",,,doctors,,,,True,
8756,199727677,way,"{'addr:housenumber': '6', 'addr:street': 'Zwan...",,,doctors,,,,True,
8757,200277466,way,"{'addr:city': 'Antwerpen', 'addr:housenumber':...",,,doctors,,,,True,
8758,200277489,way,"{'addr:city': 'Antwerpen', 'addr:housenumber':...",,,doctors,,,,True,
8788,208884448,way,"{'addr:city': 'Wilrijk', 'addr:housenumber': '...",,,pharmacy,,,,True,
9104,228558387,way,"{'amenity': 'hospital', 'email': 'augustinus@z...",,,hospital,https://www.zas.be/locaties/zas-augustinus,+32 3 443 30 11,augustinus@zas.be,True,
9415,371263460,way,"{'addr:housenumber': '1', 'addr:street': 'Moer...",,,doctors,https://www.huisartsenpunta.be/,+32 3 540 54 20,,True,


In [74]:
# rename column 'amenity_type' to 'amenity'
# rename column 'tags' to 'metadata'
medical_amenities_df.rename(columns={'amenity_type': 'amenity', 'tags': 'metadata'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medical_amenities_df.rename(columns={'amenity_type': 'amenity', 'tags': 'metadata'}, inplace=True)


In [75]:
# save to csv
medical_amenities_df.to_csv("medical_amenities.csv", index=False)