In [2]:
import osmnx as ox
import geopandas as gpd
import pandas as pd
import os
import osmnx as ox
import time
from shapely.wkt import loads as load_wkt
from requests.exceptions import ReadTimeout

In [4]:
# cities = ["Birmingham, United Kingdom", "Manchester, United Kingdom", "London, United Kingdom"]
cities = [
    "West Midlands, United Kingdom",  # Instead of Birmingham
    "Greater Manchester, United Kingdom",  # Instead of Manchester
    "Greater London, United Kingdom",  # Instead of London
    # "Glasgow, United Kingdom",
    # "Edinburgh, United Kingdom",
    # "Liverpool, United Kingdom",
    # "Bristol, United Kingdom",
    # "Newcastle upon Tyne, United Kingdom",
    # "Leeds, United Kingdom",
    # "Sheffield, United Kingdom",
    # "Nottingham, United Kingdom",
    # "Cardiff, United Kingdom",
    # "Belfast, United Kingdom"
]


tags_list = [{#natural_tags
    'landuse': ['forest', 'wood','meadow'],
    'natural': ['tree', 'tree_group','grassland', 'fell', 'heath', 'scrub', 'wetland', 'moor', 'marsh','wood']
},{#agricultural_tags
    'landuse': ['allotments', 'farmland', 'farmyard','orchard', 'plant_nursery', 'vineyard']
}, {#human_managed_green_spaces_tags
    'landuse': ['recreation_ground', 'village_green', 'greenfield', 'plant_nursery'],
    'leisure': [ 'garden', 'nature_reserve', 'park', 'public_garden'],
    'garden:type':['residential','community','botanical','arboretum']
},{#urban_green_infrastructure_tags
    'man_made': ['bioswale'],
     'roof:material':['grass','plants','roof_greening'],
    'garden:type': ['green_wall','show_garden','roof_garden','residential'],
    'natural':['tree_row','shrubbery']
},{#grassy_sports_fields_tags
    'sport': [
        'american_football', 'archery','australian_football', 'baseball','bowls', 'canadian_football', 'cricket', 'croquet',
        'equestrian', 'lacrosse', 'golf', 'rugby_league', 'rugby_union', 'soccer', 'softball', 'orienteering'
    ],
    'leisure':['miniature_golf', 'disc_golf_course','golf_course', 'pitch']
},{#building tags
    'building': ['commercial', 'industrial', 'residential', 'apartments']},
{#other random tags
    'highway':['motorway', 'trunk', 'primary', 'secondary', 'tertiary'],
    'railway':['rail', 'tram', 'light_rail'],
    'aeroway':['runway', 'taxiway', 'helipad'],
    'power': ['line', 'sub_station'],
    'landuse': ['quarry', 'landfill'],
    'industrial': ['factory']
}]

def download_osm_data(city_name, tags):
    gdfs = []
    columns_to_keep = ['osm_id', 'osm_way_id', 'name', 'type', 'aeroway', 'amenity',
                       'admin_level', 'barrier', 'boundary', 'building', 'craft', 'geological',
                       'historic', 'land_area', 'landuse', 'leisure', 'man_made', 'military',
                       'natural', 'office', 'place', 'shop', 'sport', 'tourism', 'other_tags',
                       'geometry']  # Only keep essential columns
    for key, values in tags.items():
        for value in values:
            query = {key: value}
            for attempt in range(3):  # Retry mechanism
                try:
                    print(f"Fetching data for {query} in {city_name} (Attempt {attempt+1})")
                    gdf = ox.features_from_place(city_name, tags=query)
                    if not gdf.empty:
                        # Initially, keep all columns during the fetching stage
                        gdfs.append(gdf)
                        break
                except ReadTimeout:
                    print(f"Timeout for query: {query}. Retrying...")
                    time.sleep(5)  # Wait before retrying
                except ox._errors.InsufficientResponseError:
                    print(f"No data elements in server response for query: {query}. Skipping...")
                    break
                except Exception as e:
                    print(f"An unexpected error occurred: {e}. Skipping...")
                    break
    if gdfs:
        # Concatenate all fetched data first
        all_gdf = gpd.GeoDataFrame(pd.concat(gdfs, ignore_index=True))
        
        # Then filter columns to keep only those specified, filling NaN for missing columns
        all_gdf = all_gdf.loc[:, all_gdf.columns.intersection(columns_to_keep)].reindex(columns=columns_to_keep, fill_value=None)
        
        return all_gdf
    else:
        return None


for city_name in cities:
    for tag_num, tags in enumerate(tags_list):
        if tag_num < 6:
            continue
        csv_filename = f"{city_name.replace(',', '').replace(' ', '_')}_raw_data_tag{tag_num}.csv"
        
        if os.path.exists(csv_filename):
            print(f"CSV file already exists for {city_name}. Loading data from CSV.")
            all_gdf = pd.read_csv(csv_filename, dtype={'geometry': 'str'}, low_memory=False)
            all_gdf['geometry'] = all_gdf['geometry'].apply(load_wkt)
            all_gdf = gpd.GeoDataFrame(all_gdf, geometry='geometry')
        else:
            print(f"Starting data download for {city_name}")
            all_gdf = download_osm_data(city_name, tags)
            if all_gdf is not None:
                all_gdf.to_csv(csv_filename, index=False)
                print(f"Downloaded data for {city_name} contains {len(all_gdf)} rows")
                print(f"Downloaded data for {city_name} saved to CSV.")
            else:
                print(f"No data was downloaded for {city_name}. Continue with next city if applicable.")
    
        if not all_gdf.empty:
            all_gdf['general'] = 'other'  # Initialize the column with default 'other'
            for tag, values in tags.items():
                for value in values:
                    if value in all_gdf.columns:
                        all_gdf.loc[all_gdf[value].notna(), 'general'] = tag
    
            if all_gdf.crs is None:
                all_gdf.crs = "EPSG:4326"
    
            all_gdf = all_gdf.to_crs("EPSG:27700")
    
            all_gdf = all_gdf[all_gdf.geometry.type.isin(['Polygon', 'MultiPolygon'])]
            output_file_name = f"{city_name.replace(',', '').replace(' ', '_')}_GI_tag{tag_num}.shp"
            all_gdf.to_file(output_file_name)
            print(f"Cleaned and saved green infrastructure polygons for {city_name}.")
        else:
            print(f"No data available to save for {city_name}.")


Starting data download for West Midlands, United Kingdom
Fetching data for {'highway': 'motorway'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'highway': 'trunk'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'highway': 'primary'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'highway': 'secondary'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'highway': 'tertiary'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'railway': 'rail'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'railway': 'tram'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'railway': 'light_rail'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'runway'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'taxiway'} in West Midlands, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'helipad'} in West Midlands, United Kingdom (Attempt 1)

  all_gdf.to_file(output_file_name)


Fetching data for {'highway': 'trunk'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'highway': 'primary'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'highway': 'secondary'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'highway': 'tertiary'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'railway': 'rail'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'railway': 'tram'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'railway': 'light_rail'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'runway'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'taxiway'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'helipad'} in Greater Manchester, United Kingdom (Attempt 1)
Fetching data for {'power': 'line'} in Greater Manchester, United Kingdom (Attempt 1)
Fetchin

  all_gdf.to_file(output_file_name)


Fetching data for {'highway': 'trunk'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'highway': 'primary'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'highway': 'secondary'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'highway': 'tertiary'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'railway': 'rail'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'railway': 'tram'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'railway': 'light_rail'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'runway'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'taxiway'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'aeroway': 'helipad'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'power': 'line'} in Greater London, United Kingdom (Attempt 1)
Fetching data for {'power': 'sub_station'} in Great

  all_gdf.to_file(output_file_name)
