# Extract Berlin Libraries from OpenStreetMap (Overpass API)

This notebook extracts all libraries in Berlin from OpenStreetMap using **OSMnx**.

- Reasearch, discover, collecte, and evaluating datasets
- Filter tag: `amenity=library`
- Output: Raw GeoDataFrame with OSM attributes (in both .csv and .geojson format)
- CRS: EPSG:4326


In [5]:
# imports

import osmnx as ox                  # fetch data from OpenStreetMap
import geopandas as gpd             # handle geospatial (map) data


In [6]:
# Function Definition
def extract_osm_libraries(place: str = "Berlin, Germany") -> gpd.GeoDataFrame: # Define the place for libraries
    """
    Fetch libraries from OSM for a given place.
    """
    tags = {"amenity": "library"}   # Define the tag for libraries
    gdf = ox.features_from_place(place, tags) # Fetch the data as a GeoDataFrame
    gdf = gdf.to_crs(epsg=4326)   # Convert to WGS84 coordinate system/CRS (that is EPSG:4326)
    gdf = gdf.reset_index()  # Reset index to get the OSM IDs as a column for easier handling
    return gdf


In [7]:
# Execute Extraction
libraries_gdf = extract_osm_libraries()
libraries_gdf.head()


Unnamed: 0,element,id,geometry,addr:city,addr:country,addr:housenumber,addr:postcode,addr:street,addr:suburb,amenity,...,name:es,name:he,name:ko,name:no,name:pl,name:tr,name:zh,wheelchair:url,year_of_construction,roof:material
0,node,29071031,POINT (13.34751 52.53124),Berlin,DE,33,10559,Perleberger Straße,Moabit,library,...,,,,,,,,,,
1,node,60848456,POINT (13.47084 52.53078),Berlin,DE,14,10369,Anton-Saefkow-Platz,,library,...,,,,,,,,,,
2,node,203557001,POINT (13.53872 52.52816),Berlin,DE,4,12681,Helene-Weigel-Platz,Marzahn,library,...,,,,,,,,,,
3,node,256922190,POINT (13.28719 52.5375),Berlin,DE,18,13627,Halemweg,Charlottenburg-Nord,library,...,,,,,,,,,,
4,node,257708789,POINT (13.20139 52.53613),Berlin,DE,13,13597,Carl-Schurz-Straße,Spandau,library,...,,,,,,,,,,


In [8]:
libraries_gdf.describe(include='all').T

Unnamed: 0,count,unique,top,freq,mean,std,min,25%,50%,75%,max
element,151,3,node,110,,,,,,,
id,151.0,,,,3112252880.576159,3865085392.572454,180594.0,260693927.5,1356847676.0,5154027551.5,13366816284.0
geometry,151,151,POINT (13.3475136 52.5312448),1,,,,,,,
addr:city,122,1,Berlin,122,,,,,,,
addr:country,102,1,DE,102,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
name:tr,1,1,Berlin Eyalet Kütüphanesi,1,,,,,,,
name:zh,1,1,柏林国立图书馆,1,,,,,,,
wheelchair:url,1,1,https://staatsbibliothek-berlin.de/vor-ort/men...,1,,,,,,,
year_of_construction,2,2,1903..1914,1,,,,,,,


In [9]:
# List the columns names of the GeoData

libraries_gdf.columns.to_list()

['element',
 'id',
 'geometry',
 'addr:city',
 'addr:country',
 'addr:housenumber',
 'addr:postcode',
 'addr:street',
 'addr:suburb',
 'amenity',
 'check_date',
 'check_date:opening_hours',
 'description',
 'name',
 'opening_hours',
 'phone',
 'ref:isil',
 'toilets:wheelchair',
 'website',
 'wheelchair',
 'wheelchair:description',
 'wikidata',
 'wikimedia_commons',
 'wikipedia',
 'contact:phone',
 'internet_access',
 'internet_access:fee',
 'internet_access:operator',
 'contact:email',
 'contact:website',
 'garden:type',
 'note',
 'operator',
 'wheelchair:description:de',
 'email',
 'short_name',
 'name:en',
 'operator:type',
 'operator:wikidata',
 'ref',
 'air_conditioning',
 'access',
 'changing_table',
 'kids_area:indoor',
 'level',
 'source',
 'toilets',
 'toilets:access',
 'alt_name',
 'contact:fax',
 'outdoor_seating',
 'fax',
 'room:group_study',
 'room:study_cabin',
 'service:copy',
 'service:scanner',
 'addr:housename',
 'addr:place',
 'layer',
 'name:etymology:wikidata',
 'na

In [None]:
# Save Output as GeoJSON (with geometry)
output_path = "../sources/osm_libraries.geojson"   #  (includes geometry)
libraries_gdf.to_file(output_path, driver="GeoJSON")

# Save Output as CSV (tabular only, no geometry)
csv_output_path = "../sources/osm_libraries.csv"
libraries_gdf.to_csv(csv_output_path, index=False)

print(f"Extracted {len(libraries_gdf)} libraries")

  

Extracted 151 libraries
