# Data Project: Inflation in Denmark 2015-2023

In this project, we will analyze the development in Danish Consumer Prices for the period January 2015 to March 2023 for three subcategories of commodities. These categories are: 'Food and non-alcoholic beverages', 'Housing, water, electricity, gas and other fuels' and 'Transport'. We will construct a time-series for each of category containing the development in their respective Consumer Price Indices.   
The goal is to make an environment that makes it easy to compare the development in and across the three time-series.

In [1]:
#Installing the DST API wrapper. There is no need to run this code if the API is already installed.

# %pip install git+https://github.com/alemartinello/dstapi

In [2]:
#Import libraries and modules which would be utilized.

import numpy as np
import pandas as pd
import dataproject as dp
import ipywidgets as widgets
from scipy.stats import norm

from dstapi import DstApi # install with `pip install git+https://github.com/alemartinello/dstapi`

import matplotlib.pyplot as plt
plt.rcParams.update({"axes.grid":True,"grid.color":"black","grid.alpha":"0.25","grid.linestyle":"--"})
plt.rcParams.update({'font.size': 14})

The function below "date_conv" generates datetime component out of the specific string values for time in our dataset. It will use later in this book. 

# Reading data

In [3]:
# Importing the data from DST as an DST api object. 
PRIS111 = DstApi('PRIS111') 

In [4]:
# Overview of data
PriceIndices = PRIS111.tablesummary(language='en')
display(PriceIndices)

Table PRIS111: Consumer price index by commodity group, unit and time
Last update: 2023-04-11T08:00:00


Unnamed: 0,variable name,# values,First value,First value label,Last value,Last value label,Time variable
0,VAREGR,384,000000,"00 Consumer price index, total",151000,15.1 Overall Consumer Price Index excl. energy,False
1,ENHED,3,100,Index,300,Percentage change compared to same month the y...,False
2,Tid,267,2001M01,2001M01,2023M03,2023M03,True


In [5]:
# The available values for a each variable: 
for variable in PriceIndices['variable name']:
    print(variable+':')
    display(PRIS111.variable_levels(variable, language='en'))

VAREGR:


Unnamed: 0,id,text
0,000000,"00 Consumer price index, total"
1,010000,01 Food and non-alcoholic beverages
2,011000,01.1 Food
3,011100,01.1.1 Bread and cereals
4,011110,01.1.1.1 Rice
...,...,...
379,127030,12.7.0.3 Funeral services
380,131000,13.1 Goods (total)
381,132000,13.2 Services (total)
382,141000,14.1 Overall Consumer Price Index - excl. ener...


ENHED:


Unnamed: 0,id,text
0,100,Index
1,200,Percentage change compared to previous month (...
2,300,Percentage change compared to same month the y...


Tid:


Unnamed: 0,id,text
0,2001M01,2001M01
1,2001M02,2001M02
2,2001M03,2001M03
3,2001M04,2001M04
4,2001M05,2001M05
...,...,...
262,2022M11,2022M11
263,2022M12,2022M12
264,2023M01,2023M01
265,2023M02,2023M02


In [6]:
# Define parameter dictionary for PRIS111.
params = PRIS111._define_base_params(language='en')
params

{'table': 'pris111',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'VAREGR', 'values': ['*']},
  {'code': 'ENHED', 'values': ['*']},
  {'code': 'Tid', 'values': ['*']}]}

In [7]:
# Sets parameters for the data. 
params_PRIS111 = {'table': 'pris111',
 'format': 'BULK',
 'lang': 'en',
 'variables': [{'code': 'VAREGR', 'values': ['010000','040000','070000']},
  {'code': 'ENHED', 'values': ['100']},
  {'code': 'Tid', 'values': ['>2014M12']}]}

In [8]:
#Load data from PRIS111 fulfilling selected parameters.

PriceIndices = PRIS111.get_data(params=params_PRIS111)
PriceIndices.head(10)

Unnamed: 0,VAREGR,ENHED,TID,INDHOLD
0,01 Food and non-alcoholic beverages,Index,2015M01,99.5
1,"04. Housing, water, electricity, gas and other...",Index,2015M01,99.1
2,07. Transport,Index,2015M01,97.8
3,01 Food and non-alcoholic beverages,Index,2015M02,99.2
4,"04. Housing, water, electricity, gas and other...",Index,2015M02,100.3
5,07. Transport,Index,2015M02,99.8
6,01 Food and non-alcoholic beverages,Index,2015M03,99.7
7,"04. Housing, water, electricity, gas and other...",Index,2015M03,100.2
8,07. Transport,Index,2015M03,100.1
9,01 Food and non-alcoholic beverages,Index,2015M04,99.8


# Cleaning data

In [9]:
#Change Columnames 
rename_dict = {'VAREGR':'Category', 'TID':'Time', 'INDHOLD':'PriceIndex'}

PriceIndices.rename(columns=rename_dict, inplace=True)
PriceIndices.head(10)

Unnamed: 0,Category,ENHED,Time,PriceIndex
0,01 Food and non-alcoholic beverages,Index,2015M01,99.5
1,"04. Housing, water, electricity, gas and other...",Index,2015M01,99.1
2,07. Transport,Index,2015M01,97.8
3,01 Food and non-alcoholic beverages,Index,2015M02,99.2
4,"04. Housing, water, electricity, gas and other...",Index,2015M02,100.3
5,07. Transport,Index,2015M02,99.8
6,01 Food and non-alcoholic beverages,Index,2015M03,99.7
7,"04. Housing, water, electricity, gas and other...",Index,2015M03,100.2
8,07. Transport,Index,2015M03,100.1
9,01 Food and non-alcoholic beverages,Index,2015M04,99.8


In [10]:
#Drops redundant variables 

try:
    PriceIndices = PriceIndices.drop(columns= 'ENHED')
except:
    print('Done already!')

PriceIndices



Unnamed: 0,Category,Time,PriceIndex
0,01 Food and non-alcoholic beverages,2015M01,99.5
1,"04. Housing, water, electricity, gas and other...",2015M01,99.1
2,07. Transport,2015M01,97.8
3,01 Food and non-alcoholic beverages,2015M02,99.2
4,"04. Housing, water, electricity, gas and other...",2015M02,100.3
...,...,...,...
292,"04. Housing, water, electricity, gas and other...",2023M02,120.3
293,07. Transport,2023M02,118.9
294,01 Food and non-alcoholic beverages,2023M03,128.3
295,"04. Housing, water, electricity, gas and other...",2023M03,119.5


In [11]:
# Sorting dataset 

try:
    PriceIndices.sort_values(by=['Category', 'Time', 'PriceIndex'], inplace=True)
except:
    print('Time component has been dropped from PriceIndices. Please run this code from top to bottom!')

PriceIndices.head(5)

Unnamed: 0,Category,Time,PriceIndex
0,01 Food and non-alcoholic beverages,2015M01,99.5
3,01 Food and non-alcoholic beverages,2015M02,99.2
6,01 Food and non-alcoholic beverages,2015M03,99.7
9,01 Food and non-alcoholic beverages,2015M04,99.8
12,01 Food and non-alcoholic beverages,2015M05,100.0


In [12]:
PriceIndices.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 297 entries, 0 to 296
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Category    297 non-null    object
 1   Time        297 non-null    object
 2   PriceIndex  297 non-null    object
dtypes: object(3)
memory usage: 9.3+ KB


In [13]:
# Converting data to proper datatypes.

PriceIndices.Category = PriceIndices.Category.astype('string')

try:
    PriceIndices.Time = PriceIndices.Time.astype('string')
except:
    print('Time component has been dropped from PriceIndices. Please run this code from top to bottom!')

PriceIndices.PriceIndex = PriceIndices.PriceIndex.astype(float)



In [14]:
# Removes the two digits for the unique category values.

cat_oldNames = PriceIndices.Category.unique()
cat_newNames = ['Food and non-alcoholic beverages', 'Housing, water, electricity, gas and other fuels', 'Transport']

dp.DigitRemoveFromCategory(PriceIndices, cat_oldNames, cat_newNames)

PriceIndices


Unnamed: 0,Category,Time,PriceIndex
0,Food and non-alcoholic beverages,2015M01,99.5
3,Food and non-alcoholic beverages,2015M02,99.2
6,Food and non-alcoholic beverages,2015M03,99.7
9,Food and non-alcoholic beverages,2015M04,99.8
12,Food and non-alcoholic beverages,2015M05,100.0
...,...,...,...
284,Transport,2022M11,119.0
287,Transport,2022M12,117.3
290,Transport,2023M01,118.4
293,Transport,2023M02,118.9


In [15]:
try:
    PriceIndices['Date'] = PriceIndices.apply(dp.date_conv, axis=1)
except:
    print('Time component has been dropped from PriceIndices. Please run this code from top to bottom!')


In [16]:
# Reset index

PriceIndices.reset_index(inplace = True, drop = True) # Drop old index too
PriceIndices.head(5)

Unnamed: 0,Category,Time,PriceIndex,Date
0,Food and non-alcoholic beverages,2015M01,99.5,2015-01-01
1,Food and non-alcoholic beverages,2015M02,99.2,2015-02-01
2,Food and non-alcoholic beverages,2015M03,99.7,2015-03-01
3,Food and non-alcoholic beverages,2015M04,99.8,2015-04-01
4,Food and non-alcoholic beverages,2015M05,100.0,2015-05-01


In [17]:
#Drops old 'Time' column. 

try:
    PriceIndices = PriceIndices.drop(columns= 'Time')
except:
    print('Done already!')

display(PriceIndices)

Unnamed: 0,Category,PriceIndex,Date
0,Food and non-alcoholic beverages,99.5,2015-01-01
1,Food and non-alcoholic beverages,99.2,2015-02-01
2,Food and non-alcoholic beverages,99.7,2015-03-01
3,Food and non-alcoholic beverages,99.8,2015-04-01
4,Food and non-alcoholic beverages,100.0,2015-05-01
...,...,...,...
292,Transport,119.0,2022-11-01
293,Transport,117.3,2022-12-01
294,Transport,118.4,2023-01-01
295,Transport,118.9,2023-02-01


# Making an interactive plot

In [18]:
# Define plot function 

def plot_e(df, category): 
    I = df['Category'] == category
    ax=df.loc[I,:].plot(x='Date', y='PriceIndex', style='-', legend=False)

In [19]:
# Constructs interactive plot  

widgets.interact(plot_e, 
    df = widgets.fixed(PriceIndices),
    category = widgets.Dropdown(description='Category', 
                                    options=PriceIndices.Category.unique(), 
                                    value='Food and non-alcoholic beverages')
); 

interactive(children=(Dropdown(description='Category', options=('Food and non-alcoholic beverages', 'Housing, …

# Conclusion