# Download Eurostat data via API


If not yet available, install this eurostat api client https://pypi.org/project/eurostat
```!pip install eurostat```


In [11]:
import eurostat
import pandas as pd

In [12]:
# list datasets in the theme Cities and greater cities
toc_df = eurostat.get_toc_df()
cities_toc = eurostat.subset_toc_df(toc_df, "cities and greater cities")
cities_toc

Unnamed: 0,title,code,type,last update of data,last table structure change,data start,data end
2179,Economy and finance - cities and greater cities,URB_CECFI,dataset,2025-09-03T23:00:00+0200,2025-01-08T11:00:00+0100,1991.0,2022.0
2181,Education - cities and greater cities,URB_CEDUC,dataset,2025-09-03T23:00:00+0200,2025-06-24T23:00:00+0200,1990.0,2024.0
2183,Fertility and mortality - cities and greater c...,URB_CFERMOR,dataset,2025-09-03T23:00:00+0200,2025-09-03T23:00:00+0200,1990.0,2024.0
2185,Living conditions - cities and greater cities,URB_CLIVCON,dataset,2025-09-03T23:00:00+0200,2025-01-08T11:00:00+0100,1989.0,2024.0
2187,Labour market - cities and greater cities,URB_CLMA,dataset,2025-09-03T23:00:00+0200,2025-09-03T23:00:00+0200,1989.0,2024.0
2189,Population on 1 January by age groups and sex ...,URB_CPOP1,dataset,2025-09-03T23:00:00+0200,2025-06-24T23:00:00+0200,1989.0,2024.0
2191,Population by citizenship and country of birth...,URB_CPOPCB,dataset,2025-09-03T23:00:00+0200,2025-06-24T23:00:00+0200,1990.0,2024.0
2193,Culture and tourism - cities and greater cities,URB_CTOUR,dataset,2025-09-03T23:00:00+0200,2025-09-03T23:00:00+0200,1990.0,2024.0
2196,Culture and tourism - cities and greater cities,URB_CTOUR$DV_168,dataset,2025-09-03T23:00:00+0200,2025-09-03T23:00:00+0200,,
2211,Culture and tourism - cities and greater cities,URB_CTOUR$DV_169,dataset,2025-09-03T23:00:00+0200,2025-09-03T23:00:00+0200,,


In [13]:
# which fields?
pars = eurostat.get_pars('URB_CPOP1')
pars

['freq', 'indic_ur', 'cities']

In [14]:
# list all variables in all tables
for table in cities_toc.code:
    variables = eurostat.get_dic(table, "indic_ur", full=False)
    df =  pd.DataFrame(variables, columns =['Variable code', 'Description'])
    # df.to_csv("eurostat_urb_variables.csv", sep=";")
    print(table)
    print(df)

URB_CECFI
  Variable code                                       Description
0       EC2021V                                     All companies
1       EC2039V  Number of  local units - all economic activities
URB_CEDUC
   Variable code                                        Description
0        TE1001V       Number of children 0-4 in day care or school
1        TE1039V  Share of early leavers from education and trai...
2        TE1040V  Share of early leavers from education and trai...
3        TE1041V  Share of early leavers from education and trai...
4        TE1026V  Students in higher education (ISCED level 5-8 ...
5        TE1027V  Students in higher education (ISCED level 5-8 ...
6        TE1028V  Students in higher education (ISCED level 5-8 ...
7        TE2025V  Persons (aged 25-64) with ISCED level 0, 1or 2...
8        TE2028V  Persons (aged 25-64) with ISCED level 3 or 4 a...
9        TE2031V  Persons aged 25-64 with ISCED level 5, 6, 7 or...
10       TE1001I  Number of Childr

In [15]:
# load list of selected variables
urb_variables = pd.read_csv("./../../data/city_features_collection/eurostat_urb_selected_variables.csv")
urb_variables = urb_variables[urb_variables.Priority == 1]
urb_var_bytable = urb_variables.groupby("Table")["Variable code"].apply(list).reset_index(name="variables")
urb_var_bytable

Unnamed: 0,Table,variables
0,urb_cenv,"[EN1002V, EN1003V, EN1004V, EN3012V, EN3013V, ..."
1,urb_clivcon,"[EC3039V, EC3040V, EC3064V, EC3065V, SA1001V, ..."
2,urb_clma,"[EC1145V, EC1174V, EC1177V, EC1180V]"
3,urb_ctour,[CR1015V]
4,urb_ctran,"[TT1019V, TT1020V, TT1060V, TT1064V, TT1065V, ..."


In [16]:
# download data to dataframe
print(f"Downloading {urb_var_bytable.head(1).Table} data")
filters = {"indic_ur": urb_var_bytable.head(1).variables.item(),
      "startPeriod": 2004,
      "endPeriod": 2021}
df_all = eurostat.get_data_df(urb_var_bytable.head(1).Table.item(), flags=False, filter_pars=filters, verbose=True, reverse_time=False)
for row in urb_var_bytable[1:].itertuples():
    print(f"Downloading {row.Table} data")
    filters = {"indic_ur": row.variables,
          "startPeriod": 2004,
          "endPeriod": 2021}
    df_urb = eurostat.get_data_df(row.Table, flags=False, filter_pars=filters, verbose=True, reverse_time=False)

    if not df_urb.empty and not df_urb.isna().all().all():
      f_all = pd.concat([df_all, df_urb])

Downloading 0    urb_cenv
Name: Table, dtype: object data
Download progress: 100.0%

Downloading urb_clivcon data
Download progress: 100.0%

Downloading urb_clma data


  f_all = pd.concat([df_all, df_urb])


Download progress: 100.0%

Downloading urb_ctour data


  f_all = pd.concat([df_all, df_urb])


Download progress: 100.0%

Downloading urb_ctran data


  f_all = pd.concat([df_all, df_urb])


Download progress: 100.0%



  f_all = pd.concat([df_all, df_urb])


In [17]:
# check table
df_all

Unnamed: 0,freq,indic_ur,cities\TIME_PERIOD,2004,2005,2006,2007,2008,2009,2010,...,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,A,EN1002V,AT001C,5.30,,,,,,,...,,,,,,,,,,
1,A,EN1002V,AT002C,5.13,,,,,,,...,,,,,,,,,,
2,A,EN1002V,AT003C,4.86,,,,,,,...,,,,,,,,,,
3,A,EN1002V,AT004C,4.60,,,,,,,...,,,,,,,,,,
4,A,EN1002V,AT005C,5.25,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5400,A,EN5207V,UK119C,,,,,,,,...,6.72,,,,,,,,,
5401,A,EN5207V,UK121C,,,,,,,,...,1.49,,,,,,,,,
5402,A,EN5207V,UK122C,,,,,,,,...,3.72,,,,,,,,,
5403,A,EN5207V,UK124C,,,,,,,,...,29.07,,,,,,,,,


In [18]:
# drop freq column, rename city column
df_all.rename(columns={"indic_ur" : "indic_code", "cities\TIME_PERIOD" : "urau_code"}, inplace=True)
df_all.drop(["freq"], axis=1, inplace=True)
df_all

Unnamed: 0,indic_code,urau_code,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
0,EN1002V,AT001C,5.30,,,,,,,,,,,,,,,,,
1,EN1002V,AT002C,5.13,,,,,,,,,,,,,,,,,
2,EN1002V,AT003C,4.86,,,,,,,,,,,,,,,,,
3,EN1002V,AT004C,4.60,,,,,,,,,,,,,,,,,
4,EN1002V,AT005C,5.25,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5400,EN5207V,UK119C,,,,,,,,,6.72,,,,,,,,,
5401,EN5207V,UK121C,,,,,,,,,1.49,,,,,,,,,
5402,EN5207V,UK122C,,,,,,,,,3.72,,,,,,,,,
5403,EN5207V,UK124C,,,,,,,,,29.07,,,,,,,,,


In [19]:
# save to csv file
df_all.to_csv("./../../data/city_features_collection/eurostat_urb_data_2004_2021.csv", index=False)