In [47]:
# Processing tools to read from Helsinki WFS API and produce GeoJSON

import json

import pandas as pd
import requests
import pyproj
from shapely.geometry import shape

# Prepare district population data
df_pop = pd.read_excel("Hki_vaesto_taulu2.xlsx", sheet_name="2022")

# Read the excel file the sheet named "2022", from row 8 onwards and just 2 first columns
df_pop = pd.read_excel("Hki_vaesto_taulu2.xlsx", sheet_name="2022", skiprows=7, usecols="A:B")

# Rename the columns
df_pop.columns = ["District", "Population"]

# Remove empty rows
df_pop = df_pop.dropna()
# Add new column with values "MarorDistrict", "District" or "SubDistrict".
# It is based on how many leading spaces there are in the 'District' column
space_map = {0: "MajorDistrict", 2: "District", 5: "SubDistrict"}


def district_type(x):
    # Count spaces which are in the beginning of the string
    l = int(len(x) - len(x.lstrip()))
    return space_map[l]


df_pop["Type"] = df_pop["District"].apply(district_type)

# Strip spaces from District column
df_pop["District"] = df_pop["District"].str.strip()

# Split the 'District' column to two columns by the first space
df_pop[["DistrictCode", "DistrictName"]] = df_pop["District"].str.split(" ", n=1, expand=True)

df_pop = df_pop.drop("District", axis=1)
df_pop = df_pop[["DistrictCode", "DistrictName", "Type", "Population"]]

print(df_pop.head(20))

   DistrictCode         DistrictName           Type  Population
0             1  Eteläinen suurpiiri  MajorDistrict    121157.0
1           101           Vironniemi       District     12794.0
2           010          Kruununhaka    SubDistrict      7175.0
3           020               Kluuvi    SubDistrict       687.0
4           080          Katajanokka    SubDistrict      4932.0
5           102           Ullanlinna       District     24636.0
6           030      Kaartinkaupunki    SubDistrict      1061.0
7           050            Punavuori    SubDistrict      9267.0
8           060                 Eira    SubDistrict      1083.0
9           070           Ullanlinna    SubDistrict     10893.0
10          090          Kaivopuisto    SubDistrict       465.0
11          204           Hernesaari    SubDistrict      1181.0
12          520          Suomenlinna    SubDistrict       686.0
13          103          Kampinmalmi       District     43738.0
14          040               Kamppi    

In [48]:
# Remove all rows which are not starting with a digit

# Helsinki district division data

Helsinki major district, district and city sub-district data is available
from Helsinki Region Infoshare (HRI) as WFS API in GeoJSON format.
The data is available in Finnish and Swedish and in EPSG:4326 (WGS84) projection.

https://hri.fi/data/en_GB/dataset/helsingin-piirijako

Population is gathered from excel file from
[HRI](https://hri.fi/data/en_GB/dataset/helsingin-vaesto-piireittain-ja-osa-alueittain-ian-mukaan-2004-alkaen/resource/2341bfb7-52d8-4816-b2ac-b7c7d63e716a).


In [49]:
# Helper functions
def add_population(feature: dict):
    """
    Use feature["properties"]["tunnus"] to find the population from df_pop
    """
    population_row = df_pop[df_pop["DistrictCode"] == feature["properties"]["tunnus"]]["Population"]
    # Convert the population value to integer
    if len(population_row) == 0:
        return None
    else:
        return int(population_row.iloc[0])


def calculate_geojson_polygon_area(feature: dict):
    """
    Calculate the area of a GeoJSON polygon using shapely
    """
    # Convert the feature to shapely geometry
    polygon = shape(feature["geometry"])
    # specify a named ellipsoid
    geod = pyproj.Geod(ellps="WGS84")
    area = int(abs(geod.geometry_area_perimeter(polygon)[0]))
    return area


def cleanup_district_feature(obj):
    """
    Remove unnecessary fields from the address object and rename some.
    """
    obj.pop("geometry_name")
    obj["properties"]["name"] = {
        "fi": obj["properties"]["nimi_fi"],
        "sv": obj["properties"]["nimi_se"],
    }
    properties_map = {
        "kunta": "municipality",
        "aluejako": "district_type",
    }
    for k, v in properties_map.items():
        obj["properties"][v] = obj["properties"][k]
        obj["properties"].pop(k)
    properties_to_remove = [
        "tunnus",
        "nimi_fi",
        "nimi_se",
        "yhtdatanomistaja",
        "kokotunnus",
        "datanomistaja",
        "paivitetty_tietopalveluun",
        "yhtluontipvm",
        "yhtmuokkauspvm",
    ]
    for k in properties_to_remove:
        obj["properties"].pop(k)
    return obj

In [50]:
# dataset name mapping to file name (major district, district, city sub-district)
district_mapping = {
    "Piirijako_suurpiiri": "HelsinkiMajorDistrict",
    "Piirijako_peruspiiri": "HelsinkiDistrict",
    "Piirijako_osaalue": "HelsinkiSubDistrict",
}

for type_name in district_mapping.keys():
    res = requests.get(
        f"https://kartta.hel.fi/ws/geoserver/avoindata/wfs?service=wfs&version=2.0.0&request=GetFeature&typeNames=avoindata:{type_name}&count=1000000&outputFormat=application/json&srsName=urn:ogc:def:crs:EPSG::4326"
    )
    # parse json to make sure it is valid json
    data = res.json()
    for feature in data["features"]:
        feature["properties"]["population"] = add_population(feature)
        feature["properties"]["area_m2"] = calculate_geojson_polygon_area(feature)
        feature = cleanup_district_feature(feature)
    fname = f"{district_mapping[type_name]}.json"
    with open(fname, "w") as f:
        json.dump(data, f)
        print(f"* [{fname}](https://proto.fvh.io/urbanage/{fname})")

* [HelsinkiMajorDistrict.json](https://proto.fvh.io/urbanage/HelsinkiMajorDistrict.json)
* [HelsinkiDistrict.json](https://proto.fvh.io/urbanage/HelsinkiDistrict.json)
* [HelsinkiSubDistrict.json](https://proto.fvh.io/urbanage/HelsinkiSubDistrict.json)
