# 6 of 15 central calculations

The first year course, Descriptive Economics A, presents 15 calculations for doing basic descriptive statistics.
This project shows how to conveniently apply these calcuations on selected data from Statistics Denmark to easily see the many calculation done on a short time series. We have selected six of these 15 calculations to describe the change in expenditure and income in the public sector (OFF3).

\begin{align}
    \text{Absolute change: }& x_t - x_{t-1} \\
    \text{Average absolute change: }& \frac{x_n - x_0}{n} \\
    \text{Percentage change: }& \left(\frac{x_t}{x_{t-1}}-1\right)\times 100 \\
    \text{Average percentage change: }& \left[\left(\frac{x_n}{x_0}\right)^{\frac{1}{n}}-1\right]\times 100 \\
    \text{Change in percentage points: }& \text{pct.-points}_t - \text{pct. points}_{t-1} \\
    \text{Simple index: }& \frac{x_t}{x_0}\times 100 \\
\end{align}

## Importing modules

In [52]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import pydst
import pprint as pp
import ipywidgets as widgets
from ipywidgets import interact
dst = pydst.Dst(lang='en')

## Collecting data

In [53]:
variables = dst.get_variables(table_id = 'OFF3') # collecting table OFF3 from Statistics Denmark
OFF3 = dst.get_data(table_id = 'OFF3', variables={'UI':['1.8','1.13','1.16','1.17','2.13','2.16','2.17','2.18',
                                                        '2.19'], # sub-categories
                                                  'Tid':['*'], # all time
                                                  'SEKTOR':['TOTAL']}) # total public sector
OFF3.sort_values(by = 'TID', inplace = True)

## Cleaning data

In [54]:
del OFF3['SEKTOR'] # deleting irrelevant variable
names = OFF3['UI'][0:9] # generating names variable based on different sub-categories

# Renaming variables
columns_dict = {}
columns_dict['UI'] = 'Variable'
columns_dict['TID'] = 'Time'
columns_dict['INDHOLD'] = 'Absolute level'
OFF3.rename(columns = columns_dict,inplace=True)

In [55]:
# Initialize empty dictionary
rename_dict = {}
# List of wanted names
wanted_names = ['1.2: Capital accumulation',
                '1.3: Capital expenses',
                '1.4: Current and capital expenditure (1+3)',
                '1.1: Current expenditure',
                '2.1: Current revenue',
                '2.2: Capital revenue',
                '2.3: Current plus capital revenue (1+2)',
                '2.4: Currents surplus=Gross saving (2.1-1.1)',
                '2.5: Overall surplus=Net lending/borrowing (2.3-1.4)']
# Create rename dictionary for variable names
for name, wantedname in zip(names,wanted_names):
    rename_dict[name] = wantedname
# Rename the variables
for key,value in rename_dict.items():
    OFF3.Variable.replace(key, value, inplace=True)

### Create subsetting booleans

In [56]:
# Initializes list of lists for subsetting
Ilist = [[] for eachlist in range(9)]

In [57]:
# List i in Ilist is the true/false boolean for name i in wanted_names
for number, name in enumerate(wanted_names):
    Ilist[number] = OFF3['Variable']==name

In [58]:
names = ['capital_accumulation', 'capital_expenses', 'current_and_capital_expenditure',
         'current_expenditure', 'current_revenue', 'capital_revenue',
         'current_plus_capital_revenue', 'current_surplus', 'overall_surplus']
for i, name in enumerate(names):
    namespace = locals()
    namespace[name] = (OFF3[Ilist[i]])

In [59]:
dataframes = [capital_accumulation, capital_expenses, current_and_capital_expenditure,
              current_expenditure, current_revenue, capital_revenue,
              current_plus_capital_revenue, current_surplus, overall_surplus]

# Resetting index
for i in dataframes:
    i.reset_index(drop = True, inplace = True)

## Calculations

We can now make each calculation, (1)-(6), on every data frame as a new variable.

In [60]:
for i in dataframes:
    
    # Absolute change
    i['Absolute change'] = i['Absolute level'].diff()
    
    # Average absolute change
    i['Average absolute change'] = i['Absolute level'].mean()
    
    # Percentage change
    i['Percentage change'] = i['Absolute level'].pct_change()*100
    
    # Average percentage change
    i['Average percentage change'] = ((i['Absolute level'].iloc[-1]/i['Absolute level'][0])**(1/len(i['Absolute level']))-1)*100
    
    # Change in percentage points
    i['Change in percentage points'] = i['Percentage change'].diff()
    
    # Simple index
    i['Simple index'] = np.nan # Create 'Simple index'-variable with NaNs
    
    for j in range(0, len(i['Absolute level'])):
        i['Simple index'][j] = i['Absolute level'].iloc[j]/i['Absolute level'].iloc[0]*100

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the document

## Plotting the data

In [75]:
rows = ['Absolute level', 'Absolute change', 'Average absolute change', 'Percentage change',
        'Average percentage change', 'Change in percentage points', 'Simple index']

data_names = ['Capital accumulation', 'Capital expenses', 'Current and capital expenditure',
             'Current expenditure', 'Current revenue', 'Capital revenue', 
             'Current plus capital revenue', 'Current surplus', 'Overall surplus']

inputoutput = {'Capital accumulation':0, 'Capital expenses':1, 'Current and capital expenditure':2,
             'Current expenditure':3, 'Current revenue':4, 'Capital revenue':5, 
             'Current plus capital revenue':6, 'Current surplus':7, 'Overall surplus':8}

def view2(x, df):
    if x == 'Average absolute change':
        result = print(f'The {x.lower()} of {df.lower()} is {dataframes[inputoutput[df]][x][3]:.0f} m. DKK per year.')
    elif x == 'Average percentage change':
        result = print(f'The {x.lower()} of {df.lower()} is {dataframes[inputoutput[df]][x][3]:.2f} percent per year.')
    else:
        plt.plot(dataframes[inputoutput[df]]['Time'], dataframes[inputoutput[df]][x])
        if x == 'Percentage change':
            plt.ylabel(f'{x} (%)')
        elif x == 'Change in percentage points':
            plt.ylabel(f'{x} (%-points)')
        else:
            plt.ylabel(f'{x} (m. DKK)')
        plt.xlabel(f'Year')
        plt.title(f'Figure: {df}')
        result = plt.show()
    return result
x1 = widgets.Dropdown(options=rows, description='Calculation')
df1 = widgets.Dropdown(options=data_names,value='Capital accumulation', description='Data set')
interact(view2, x=x1,df=df1)

interactive(children=(Dropdown(description='Calculation', options=('Absolute level', 'Absolute change', 'Avera…

<function __main__.view2(x, df)>