Packages

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
# dates & cleaning
import re
import warnings
warnings.filterwarnings("ignore")
# optional: show full tables in console
pd.set_option("display.max_columns", None)
pd.set_option("display.width", 120)

In [2]:
#
demo_file = "../DATA/Amsterdam Region districts and neigbourhood data.xlsx"
# read file
demo = pd.read_excel(demo_file, skiprows=1)
# check
print("Demographics shape:", demo.shape)
print("\nDemographics columns:")
print(demo.columns)

Demographics shape: (583, 22)

Demographics columns:
Index(['Unnamed: 0', 'Unnamed: 1', 'Unnamed: 2', 'Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7',
       'Unnamed: 8', 'Unnamed: 9', 'Unnamed: 10', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13', 'Unnamed: 14',
       'Unnamed: 15', 'Unnamed: 16', 'Unnamed: 17', 'Unnamed: 18', 'Unnamed: 19', 'Unnamed: 20', 'Unnamed: 21'],
      dtype='str')


In [3]:
def clean_demo_sheet(file, year):
    year = str(year)
    if year == "2025":
        raw = pd.read_excel(demo_file, sheet_name=year, header=None, skiprows=3)
    elif year == "2024":
        raw = pd.read_excel(demo_file, sheet_name=year, header=None, skiprows=1)
    else:
        raw = pd.read_excel(demo_file, sheet_name=year, header=None)

    top = raw.iloc[0]      # group titles
    bottom = raw.iloc[1]   # column titles

    columns = []

    for t, b in zip(top, bottom):

        t = str(t).strip()
        b = str(b).strip()

        if b == "#" or b == "nan":
            columns.append(t)

        elif t == "nan":
            columns.append(b)

        else:
            columns.append(f"{t} {b}")

    # actual data starts row 2
    df = raw.iloc[2:].copy()
    df.columns = columns
    df = df.reset_index(drop=True)

    # remove empty rows
    df = df[df.iloc[:,0].notna()]
    return df


In [4]:
df2025 = clean_demo_sheet(demo_file, 2025)
df2024 = clean_demo_sheet(demo_file, 2024)
df2023 = clean_demo_sheet(demo_file, 2023)
df2022 = clean_demo_sheet(demo_file, 2022)
df2021 = clean_demo_sheet(demo_file, 2021)
df2020 = clean_demo_sheet(demo_file, 2020)

In [5]:
df2025.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year
0,Amsterdam,Municipality,GM0363,873338,433550,439788,123066,109208,317606,210029,113429,10779,5914,474875,256596,104309,113970,18,21295,65685,.,2025
1,Burgwallen-Oude Zijde,District,WK036300,4520,2485,2040,205,780,2140,945,445,35,20,3180,2155,780,245,14,35,130,.,2025
2,Kop Zeedijk,Neighbourhood,BU03630000,1115,650,465,40,145,555,275,100,5,5,770,520,200,50,14,.,50,1012,2025
3,Oude Kerk e.o.,Neighbourhood,BU03630001,695,390,305,20,150,340,125,70,5,0,510,350,130,30,14,.,15,1012,2025
4,Burgwallen Oost,Neighbourhood,BU03630002,1650,860,790,90,285,770,325,180,15,5,1145,760,285,105,14,25,40,1012,2025


In [6]:
df2024.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year
0,Amsterdam,Municipality,GM0363,931298,460766,470532,122836,124148,343274,214560,126480,9797,6021,515181,283997,114010,117174,18,21200,67885,.,2024
1,Haarlemmerbuurt,District,WK0363AA,9180,4570,4610,785,915,3090,2530,1865,55,65,5765,3505,1395,870,16,135,650,.,2024
2,Planciusbuurt-Noord,Neighbourhood,BU0363AA01,345,185,160,20,25,165,100,35,0,5,260,190,45,20,13,.,15,1013,2024
3,Westelijke eilanden,Neighbourhood,BU0363AA02,2895,1375,1520,275,270,920,815,610,20,20,1835,1150,400,295,16,60,170,1013,2024
4,Westerdokseiland,Neighbourhood,BU0363AA03,1850,945,900,205,150,540,530,420,15,5,1000,450,335,220,18,25,145,1013,2024


In [7]:
df2023.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year
0,Amsterdam,Municipality,GM0363,918117,454273,463844,124270,120404,335216,215053,123174,9673,5977,505705,277385,111234,117086,18,22555.0,67055,.,2023
1,Haarlemmerbuurt,District,WK0363AA,9135,4565,4570,810,855,3055,2640,1775,70,65,5695,3410,1405,875,16,155.0,630,.,2023
2,Planciusbuurt-Noord,Neighbourhood,BU0363AA01,345,180,160,15,25,180,90,35,5,5,255,185,50,20,13,,10,1013,2023
3,Westelijke eilanden,Neighbourhood,BU0363AA02,2910,1395,1520,290,260,910,870,585,25,15,1840,1140,400,295,16,55.0,160,1013,2023
4,Westerdokseiland,Neighbourhood,BU0363AA03,1825,925,900,210,140,515,550,410,15,10,990,445,330,215,18,30.0,145,1013,2023


In [8]:
df2022.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year
0,Amsterdam,Municipality,GM0363,882633,437582,445051,121642,115117,320787,209094,115993,9657,5788,484574,265133,105998,113443,18,21590,68325,.,2022
1,Burgwallen-Oude Zijde,District,WK036300,4550,2515,2030,205,865,2100,920,445,25,20,3245,2230,770,245,14,35,125,.,2022
2,Kop Zeedijk,Neighbourhood,BU03630000,1135,640,495,45,155,565,265,100,5,5,825,575,195,55,14,10,45,1012,2022
3,Oude Kerk e.o.,Neighbourhood,BU03630001,705,400,305,20,170,325,115,70,5,5,515,355,135,30,14,.,15,1012,2022
4,Burgwallen Oost,Neighbourhood,BU03630002,1630,880,755,85,335,720,325,165,10,10,1135,770,265,100,14,20,45,1012,2022


In [9]:
df2021.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year
0,Amsterdam,Municipality,GM0363,873338,433550,439788,123066,109208,317606,210029,113429,10779,5914,474875,256596,104309,113970,18,21295,65685,.,2021
1,Burgwallen-Oude Zijde,District,WK036300,4520,2485,2040,205,780,2140,945,445,35,20,3180,2155,780,245,14,35,130,.,2021
2,Kop Zeedijk,Neighbourhood,BU03630000,1115,650,465,40,145,555,275,100,5,5,770,520,200,50,14,.,50,1012,2021
3,Oude Kerk e.o.,Neighbourhood,BU03630001,695,390,305,20,150,340,125,70,5,0,510,350,130,30,14,.,15,1012,2021
4,Burgwallen Oost,Neighbourhood,BU03630002,1650,860,790,90,285,770,325,180,15,5,1145,760,285,105,14,25,40,1012,2021


In [10]:
df2020.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year
0,Amsterdam,Municipality,GM0363,872757,432879,439878,125119,108636,317667,210376,110959,10345,5763,475368,258962,101708,114698,18,20660,70580,.,2020
1,Burgwallen-Oude Zijde,District,WK036300,4465,2465,2000,220,695,2190,940,425,30,10,3195,2230,730,240,14,30,135,.,2020
2,Kop Zeedijk,Neighbourhood,BU03630000,1100,625,475,50,125,575,275,75,10,5,760,500,205,50,14,.,55,1012,2020
3,Oude Kerk e.o.,Neighbourhood,BU03630001,730,410,315,30,130,370,130,70,10,0,540,385,120,35,13,.,15,1012,2020
4,Burgwallen Oost,Neighbourhood,BU03630002,1615,860,755,90,255,780,310,175,15,5,1155,810,255,95,14,15,45,1012,2020


In [26]:
demographics = pd.concat([df2020, df2021, df2022, df2023, df2024, df2025], ignore_index=True)

In [27]:
demographics.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year
0,Amsterdam,Municipality,GM0363,872757,432879,439878,125119,108636,317667,210376,110959,10345,5763,475368,258962,101708,114698,18,20660,70580,.,2020
1,Burgwallen-Oude Zijde,District,WK036300,4465,2465,2000,220,695,2190,940,425,30,10,3195,2230,730,240,14,30,135,.,2020
2,Kop Zeedijk,Neighbourhood,BU03630000,1100,625,475,50,125,575,275,75,10,5,760,500,205,50,14,.,55,1012,2020
3,Oude Kerk e.o.,Neighbourhood,BU03630001,730,410,315,30,130,370,130,70,10,0,540,385,120,35,13,.,15,1012,2020
4,Burgwallen Oost,Neighbourhood,BU03630002,1615,860,755,90,255,780,310,175,15,5,1155,810,255,95,14,15,45,1012,2020


In [28]:
demographics.shape

(3577, 22)

In [29]:
text_cols = [
    "Districts & Neigbourhoods",
    "Type of region",
    "Regioncode",
    "Year"
]

num_cols = [c for c in demographics.columns if c not in text_cols]

for col in num_cols:
    demographics[col] = (
        demographics[col]
        .astype(str)
        .str.replace(r'(?<=\d),(?=\d)', '.', regex=True)
        .pipe(pd.to_numeric, errors="coerce")
    )


In [None]:
demographics = demographics[
    demographics["Type of region"].isin(["Neighbourhood", "District"])
]

In [31]:
demographics["Type of region"].unique()

<ArrowStringArray>
['District', 'Neighbourhood']
Length: 2, dtype: str

In [32]:
demographics[num_cols] = demographics[num_cols].fillna(0)

Education Inicators

In [33]:
demographics["children_0_15"] = demographics["Population| Age group |0 tot 15 year"]
demographics["youth_15_25"] = demographics["Population| Age group |15 tot 25 year"]
demographics["families_children"] = demographics["Households with children"]


*Care demand* *INDiCATORS*

In [34]:
demographics["elderly_65_plus"] = demographics["Population| Age group |65+ year"]
demographics["single_households_ind"] = demographics["Single-person households"]
demographics["youth_care_users"] = demographics["Care|Young people receiving youth care"]
demographics["wmo_users"] = demographics["Healthcare|Wmo clients"]

YOUTH CARE INDICATORS 

In [35]:
demographics["youth_care_rate"] = demographics["youth_care_users"] / demographics["Total population"] * 1000
demographics["wmo_rate"] = demographics["wmo_users"] / demographics["Total population"] * 1000


In [36]:
# sort properly
demographics = demographics.sort_values(["Districts & Neigbourhoods", "Year"])

cols = [
    "children_0_15",
    "youth_15_25",
    "elderly_65_plus",
    "families_children",
    "single_households_ind"
]

# growth per district
for c in cols:
    demographics[c + "_growth"] = (
        demographics
        .groupby("Districts & Neigbourhoods")[c]
        .pct_change()
    )


In [41]:
demographics.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year,children_0_15,youth_15_25,families_children,elderly_65_plus,single_households_ind,youth_care_users,wmo_users,youth_care_rate,wmo_rate,children_0_15_growth,youth_15_25_growth,elderly_65_plus_growth,families_children_growth,single_households_ind_growth
530,AMC,Neighbourhood,BU03639205,35.0,25.0,5.0,0.0,0.0,15.0,20.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,1.0,0.0,20.0,1105.0,2020,0.0,0.0,0.0,0.0,5.0,0.0,20.0,0.0,571.428571,,,,,
1110,AMC,Neighbourhood,BU03639205,30.0,25.0,5.0,0.0,0.0,10.0,20.0,0.0,0.0,0.0,25.0,30.0,0.0,0.0,1.0,0.0,0.0,1105.0,2021,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,,,,,5.0
1690,AMC,Neighbourhood,BU03639205,30.0,20.0,5.0,0.0,0.0,15.0,15.0,0.0,0.0,0.0,30.0,30.0,0.0,0.0,1.0,0.0,10.0,1105.0,2022,0.0,0.0,0.0,0.0,30.0,0.0,10.0,0.0,333.333333,,,,,0.0
2316,AMC,Neighbourhood,BU0363TA06,30.0,25.0,0.0,0.0,0.0,15.0,15.0,0.0,0.0,0.0,30.0,30.0,0.0,0.0,1.0,0.0,10.0,1105.0,2023,0.0,0.0,0.0,0.0,30.0,0.0,10.0,0.0,333.333333,,,,,0.0
2944,AMC,Neighbourhood,BU0363TA06,35.0,25.0,10.0,0.0,0.0,15.0,15.0,0.0,0.0,0.0,35.0,35.0,0.0,0.0,1.0,0.0,0.0,1105.0,2024,0.0,0.0,0.0,0.0,35.0,0.0,0.0,0.0,0.0,,,,,0.166667


In [39]:
demographics.groupby("Year")["Districts & Neigbourhoods"].nunique()

Year
2020    573
2021    573
2022    573
2023    624
2024    624
2025    573
Name: Districts & Neigbourhoods, dtype: int64

In [43]:
demographics[["Districts & Neigbourhoods","Year","children_0_15","children_0_15_growth"]].head(20)


Unnamed: 0,Districts & Neigbourhoods,Year,children_0_15,children_0_15_growth
530,AMC,2020,0.0,
1110,AMC,2021,0.0,
1690,AMC,2022,0.0,
2316,AMC,2023,0.0,
2944,AMC,2024,0.0,
3527,AMC,2025,0.0,
255,Aalsmeerwegbuurt Oost,2020,245.0,
835,Aalsmeerwegbuurt Oost,2021,220.0,-0.102041
1415,Aalsmeerwegbuurt Oost,2022,210.0,-0.045455
3252,Aalsmeerwegbuurt Oost,2025,220.0,0.047619


In [40]:
demographics.shape

(3566, 36)

In [None]:
demographics.to_csv("../data_clean/demographics.csv", index=False)

In [25]:
demographics.info()

<class 'pandas.DataFrame'>
Index: 3566 entries, 530 to 3067
Data columns (total 36 columns):
 #   Column                                  Non-Null Count  Dtype  
---  ------                                  --------------  -----  
 0   Districts & Neigbourhoods               3566 non-null   str    
 1   Type of region                          3566 non-null   str    
 2   Regioncode                              3566 non-null   str    
 3   Total population                        3566 non-null   float64
 4   Man                                     3566 non-null   float64
 5   Woman                                   3566 non-null   float64
 6   Population| Age group |0 tot 15 year    3566 non-null   float64
 7   Population| Age group |15 tot 25 year   3566 non-null   float64
 8   Population| Age group |25 tot 45 year   3566 non-null   float64
 9   Population| Age group |45 tot 65 year   3566 non-null   float64
 10  Population| Age group |65+ year         3566 non-null   float64
 11  Birth

In [45]:
demographics.head()

Unnamed: 0,Districts & Neigbourhoods,Type of region,Regioncode,Total population,Man,Woman,Population| Age group |0 tot 15 year,Population| Age group |15 tot 25 year,Population| Age group |25 tot 45 year,Population| Age group |45 tot 65 year,Population| Age group |65+ year,Births,Deaths,Total households,Single-person households,Households without children,Households with children,Average household size,Care|Young people receiving youth care,Healthcare|Wmo clients,ZIPcode,Year,children_0_15,youth_15_25,families_children,elderly_65_plus,single_households_ind,youth_care_users,wmo_users,youth_care_rate,wmo_rate,children_0_15_growth,youth_15_25_growth,elderly_65_plus_growth,families_children_growth,single_households_ind_growth
530,AMC,Neighbourhood,BU03639205,35.0,25.0,5.0,0.0,0.0,15.0,20.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,1.0,0.0,20.0,1105.0,2020,0.0,0.0,0.0,0.0,5.0,0.0,20.0,0.0,571.428571,,,,,
1110,AMC,Neighbourhood,BU03639205,30.0,25.0,5.0,0.0,0.0,10.0,20.0,0.0,0.0,0.0,25.0,30.0,0.0,0.0,1.0,0.0,0.0,1105.0,2021,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,,,,,5.0
1690,AMC,Neighbourhood,BU03639205,30.0,20.0,5.0,0.0,0.0,15.0,15.0,0.0,0.0,0.0,30.0,30.0,0.0,0.0,1.0,0.0,10.0,1105.0,2022,0.0,0.0,0.0,0.0,30.0,0.0,10.0,0.0,333.333333,,,,,0.0
2316,AMC,Neighbourhood,BU0363TA06,30.0,25.0,0.0,0.0,0.0,15.0,15.0,0.0,0.0,0.0,30.0,30.0,0.0,0.0,1.0,0.0,10.0,1105.0,2023,0.0,0.0,0.0,0.0,30.0,0.0,10.0,0.0,333.333333,,,,,0.0
2944,AMC,Neighbourhood,BU0363TA06,35.0,25.0,10.0,0.0,0.0,15.0,15.0,0.0,0.0,0.0,35.0,35.0,0.0,0.0,1.0,0.0,0.0,1105.0,2024,0.0,0.0,0.0,0.0,35.0,0.0,0.0,0.0,0.0,,,,,0.166667
