# Problem set 3 extra assingment using Statistics Denmark's API

In [1]:
def load_reqs(installs =False):
    # Download all required modules
    if installs:
        !pip install numpy
        !pip install matplotlib
        !pip install pandas
        !pip install ipywidgets
        !pip install git+https://github.com/elben10/pydst
        import numpy as np

In [2]:
load_reqs(installs =False) #Set to True to install modules
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
import pandas as pd
import ipywidgets as widgets
import pydst

In [3]:
table_id = 'NAH1' # the table we want

In [4]:
Dst = pydst.Dst(lang='en') # setup data loader with the langauge 'english'

In [5]:
Dst.get_data(table_id = table_id, variables=None) # inspect the table 

Unnamed: 0,TRANSAKT,PRISENHED,TID,INDHOLD
0,P.1 Output,Current prices,1966,144582


In [6]:
variables = {'TRANSAKT':['*'],'PRISENHED':['*'],'TID':['*']} # specify the variables ['*'] is all values
api = Dst.get_data(table_id = table_id, variables=variables) # get the table
api.sort_values(by=['TRANSAKT', 'PRISENHED', 'TID'], inplace=True) # sort the table
api # display the table

Unnamed: 0,TRANSAKT,PRISENHED,TID,INDHOLD
1620,D.21 Taxes on products,"2010-prices, chained values",1966,98704
3120,D.21 Taxes on products,"2010-prices, chained values",1967,113353
1260,D.21 Taxes on products,"2010-prices, chained values",1968,121454
1320,D.21 Taxes on products,"2010-prices, chained values",1969,128779
1680,D.21 Taxes on products,"2010-prices, chained values",1970,131471
...,...,...,...,...
1259,Uses,Current prices,2017,5157887
2219,Uses,Current prices,2018,5391323
119,Uses,Current prices,2019,5576012
1619,Uses,Current prices,2020,5453676


In [7]:
def subset(df, price):
    '''
    Subsets and clean the api table
    
    Args:
        df (PandasDataframe): Sorted api dataframe with TRANSAKT, PRISENHED (price unit), TID (time), INDHOLD (values)
        price (string): Selected PRISENHED (price unit)
    Returns:
        clean dataframe ready to plot
    '''
    
    #Creating dictionary to rename columns
    rename_dict = {} 
    rename_dict['P.1 Output'] = 'Y'
    rename_dict['P.3 Final consumption expenditure'] = 'C'
    rename_dict['P.3 Government consumption expenditure'] = 'G'
    rename_dict['P.5 Gross capital formation'] = 'I'
    rename_dict['P.6 Export of goods and services'] = 'X'
    rename_dict['P.7 Import of goods and services'] = 'M'
    
    sub_df = df[df["PRISENHED"] == price] # subset the dataframe
    selected = list(rename_dict.keys()) #list the keys in the rename dict 
    sub_df = sub_df[sub_df.TRANSAKT.isin(selected)] # select the columns from the selected list
    sub_df =sub_df.pivot(index='TID', columns='TRANSAKT', values='INDHOLD') # pivot the table with TID (time) as index
    sub_df.rename(columns=rename_dict,inplace=True) #remane columns with rename dict
    sub_df['year'] = sub_df.index # create a colum named year, by the index value
    sub_df.index = pd.RangeIndex(start=0, stop=sub_df.shape[0], step=1) # set new index as the range from 0 to the amount of columns
    
    return sub_df

In [8]:
def _plot_timeseries(dataframe, variable, years, price_var):
    dataframe = subset(dataframe, price_var)
    fig = plt.figure(dpi=100)
    ax = fig.add_subplot(1,1,1)
    dataframe = dataframe.apply(pd.to_numeric) # All values to numeric
    
    I = (dataframe['year'] >= years[0]) & (dataframe['year'] <= years[1])
    
    x = dataframe.loc[I,'year']
    y = dataframe.loc[I,variable]
    ax.plot(x,y)
    
    ax.set_xticks(list(range(years[0], years[1] + 1, 2)))    
    ax.set_ylabel(f'{variable}: {price_var}') # creating y label, look how it changes, nice :)
    ax.set_xticklabels(ax.get_xticks(), rotation = 50) # rotating x ticks 
def plot_timeseries(dataframe):
    
    widgets.interact(_plot_timeseries, 
    dataframe = widgets.fixed(dataframe),   
    price_var = widgets.Dropdown(
        description='Price variable', 
        options= ['Current prices','2010-prices, chained values'], 
        value='2010-prices, chained values'),
                     
    variable = widgets.Dropdown(
        description='variable', 
        options=['Y','C','G','I','X','M'], 
        value='Y'),
    years=widgets.IntRangeSlider(
        description="years",
        min=1966,
        max=2018,
        value=[1980, 2018],
        continuous_update=False,
    
    )                 
); 

In [9]:
plot_timeseries(api)

interactive(children=(Dropdown(description='variable', options=('Y', 'C', 'G', 'I', 'X', 'M'), value='Y'), Int…