In [2]:
import numpy as np
import pandas as pd
import datetime
import pandas_datareader
import pydst
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
from matplotlib_venn import venn2

# setup data loader with the langauge 'english'\n",
Dst = pydst.Dst(lang='en')

# List all subjects.
Dst.get_subjects()

# List all tables under the subject Education and knowledge which has the id 03\n",
tables = Dst.get_tables(subjects=['03'])

#Choose the table \"HFUDD11: Befolkningens højest fuldførte uddannelse (15-69 år) efter bopælsområde, herkomst, højest fuldførte uddannelse, alder og køn\"\n",
tables[tables.id == 'HFUDD11']

# Get the variables in the dataset HFUDD11\n",
Edd_vars = Dst.get_variables(table_id='HFUDD11')
Edd_vars

# List all the ID of the of variable in the dataset HFUDD11:\n",
Edd_vars = Dst.get_variables(table_id='HFUDD11')
for id in ['BOPOMR','HERKOMST','HFUDD','Tid','ALDER']:
    print(id)
    values = Edd_vars.loc[Edd_vars.id == id,['values']].values[0,0]
    for value in values:      
        print(f' id = {value["id"]}, text = {value["text"]}')

BOPOMR
 id = 000, text = All Denmark
 id = 084, text = Region Hovedstaden
 id = 101, text = Copenhagen
 id = 147, text = Frederiksberg
 id = 155, text = Dragør
 id = 185, text = Tårnby
 id = 165, text = Albertslund
 id = 151, text = Ballerup
 id = 153, text = Brøndby
 id = 157, text = Gentofte
 id = 159, text = Gladsaxe
 id = 161, text = Glostrup
 id = 163, text = Herlev
 id = 167, text = Hvidovre
 id = 169, text = Høje-Taastrup
 id = 183, text = Ishøj
 id = 173, text = Lyngby-Taarbæk
 id = 175, text = Rødovre
 id = 187, text = Vallensbæk
 id = 201, text = Allerød
 id = 240, text = Egedal
 id = 210, text = Fredensborg
 id = 250, text = Frederikssund
 id = 190, text = Furesø
 id = 270, text = Gribskov
 id = 260, text = Halsnæs
 id = 217, text = Helsingør
 id = 219, text = Hillerød
 id = 223, text = Hørsholm
 id = 230, text = Rudersdal
 id = 400, text = Bornholm
 id = 411, text = Christiansø
 id = 085, text = Region Sjælland
 id = 253, text = Greve
 id = 259, text = Køge
 id = 350, text 

In [3]:
# Using the IDs from the previous table we get the data
variables = {'BOPOMR':['*'],'HERKOMST':['*'],'HFUDD':['H10','H20','H30','H40','H50','H60','H70','H80'],'Tid':['*'],'KØN':['*'],'ALDER':['TOT']}
inc_api = Dst.get_data(table_id = 'HFUDD11', variables=variables)
inc_api.head(50)

Unnamed: 0,BOPOMR,HERKOMST,HFUDD,TID,KØN,ALDER,INDHOLD
0,Haderslev,Persons of Danish origin,H80 PhD programs,2008,Women,"Age, total",5
1,Haderslev,Persons of Danish origin,H80 PhD programs,2008,Men,"Age, total",15
2,Haderslev,Persons of Danish origin,H80 PhD programs,2008,Total,"Age, total",20
3,Billund,Total,H10 Primary education,2008,Women,"Age, total",3565
4,Billund,Total,H10 Primary education,2008,Men,"Age, total",3489
5,Billund,Total,H10 Primary education,2008,Total,"Age, total",7054
6,Langeland,Persons of Danish origin,H40 Short cycle higher education,2008,Women,"Age, total",63
7,Langeland,Persons of Danish origin,H40 Short cycle higher education,2008,Men,"Age, total",148
8,Langeland,Persons of Danish origin,H40 Short cycle higher education,2008,Total,"Age, total",211
9,Langeland,Persons of Danish origin,H50 Vocational bachelors educations,2008,Women,"Age, total",567


In [4]:
#Drop colum ALDER and KØN
drop_these = ['ALDER','KØN']
inc_api.drop(drop_these ,axis=1,inplace=True)
inc_api.head(50)

Unnamed: 0,BOPOMR,HERKOMST,HFUDD,TID,INDHOLD
0,Haderslev,Persons of Danish origin,H80 PhD programs,2008,5
1,Haderslev,Persons of Danish origin,H80 PhD programs,2008,15
2,Haderslev,Persons of Danish origin,H80 PhD programs,2008,20
3,Billund,Total,H10 Primary education,2008,3565
4,Billund,Total,H10 Primary education,2008,3489
5,Billund,Total,H10 Primary education,2008,7054
6,Langeland,Persons of Danish origin,H40 Short cycle higher education,2008,63
7,Langeland,Persons of Danish origin,H40 Short cycle higher education,2008,148
8,Langeland,Persons of Danish origin,H40 Short cycle higher education,2008,211
9,Langeland,Persons of Danish origin,H50 Vocational bachelors educations,2008,567


In [5]:
#Translate the names of the variable to english
inc_api.rename(columns={'BOPOMR':'municipality','HERKOMST':'origin','TID':'year','HFUDD':'education','INDHOLD':'number of individuals'},inplace=True)
inc_api.head(5)

Unnamed: 0,municipality,origin,education,year,number of individuals
0,Haderslev,Persons of Danish origin,H80 PhD programs,2008,5
1,Haderslev,Persons of Danish origin,H80 PhD programs,2008,15
2,Haderslev,Persons of Danish origin,H80 PhD programs,2008,20
3,Billund,Total,H10 Primary education,2008,3565
4,Billund,Total,H10 Primary education,2008,3489
