# Data project

# Imports

In [121]:
import pandas as pd
import matplotlib.pyplot as plt
import ipywidgets as widgets
import bqplot as bq
from IPython.display import display
import plotly.graph_objects as go

# autoreload modules when code is run
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Import data sets

We get data from statistikbanken.dk/NAN1 even though the two data sets are from the same source we could not get an output in one file and therefore we have to do it manually in Python. We import two files with danish data from 2000 to 2023. The first file contains the components of the national accounts, and the other has the level of employment. Since our files use danish notation we make sure python knows what the symbols for seperator and decimal are. Lastly we combine the two files. 

In [122]:
# We load the two CSV files
df1 = pd.read_csv('National.csv', sep=";", decimal="," )
df2 = pd.read_csv('Employ.csv', sep=";", decimal="," )

# We select the desired columns from the both datasets
df1_selected = df1[['Date','Y', 'M', 'X', 'C', 'G', 'I']]
df2_selected = df2[['N']]

# We combine the selected columns into a new dataset
df = pd.concat([df1_selected, df2_selected], axis=1)


# Data exploration

First we change the format of the 'Date' column from '2000K2' to '2000.25'
We construct a new variable NX from the two variables X and M. We make an interactive figure to illustrate the different components of the data set. 

In [123]:
# We replace 'K' with 'Q' in 'Date' column
df['Date'] = df['Date'].str.replace('K', 'Q')

# We convert 'Date' column to PeriodIndex
df['Date'] = pd.PeriodIndex(df['Date'], freq='Q')

# We convert 'Date' column to year with fractional part representing the quarter
df['Date'] = df['Date'].map(lambda x: x.year + (x.quarter - 1) / 4)

# We create a new column by subtracting 'M' from 'X' which gives net export 'NX'
df['NX'] = df['X'] - df['M']

# We create a dropdown menu with the variable names
dropdown = widgets.Dropdown(options=['Y', 'C', 'G', 'I', 'NX', 'N'])

# We create a dictionary that maps the original variable names to the new names
name_dict = {'Y': 'GDP', 'C': 'Consumption', 'G': 'Public consumption', 'I': 'Investment', 'NX': 'Net exports', 'N': 'Employment level'}

# We create a dictionary for the y-axis labels
ylabel_dict = {'Y': 'Billions in DKK', 'C': 'Billions in DKK', 'G': 'Billions in DKK', 'I': 'Billions in DKK', 'NX': 'Billions in DKK', 'N': 'Thousands of people'}

# We create scales for the x and y axes
x_sc = bq.LinearScale(min=2000, max=2024)
y_sc = bq.LinearScale()

# We create a line mark
line = bq.Lines(x=df['Date'], y=[], scales={'x': x_sc, 'y': y_sc})

# We create axes
x_ax = bq.Axis(scale=x_sc)
y_ax = bq.Axis(scale=y_sc, label='', orientation='vertical')

# We create the figure
fig = bq.Figure(marks=[line], axes=[x_ax, y_ax], title='', layout=widgets.Layout(width='600px', height='500px'))

# We create a function to update the figure
def update_figure(change):
    line.y = df[dropdown.value]
    fig.layout.title = f'{name_dict[dropdown.value]} development from 2000 to 2023'
    y_ax.label = ylabel_dict[dropdown.value]

# The following line updates the figure as the dropdown value changes
dropdown.observe(update_figure, 'value')

# We display the dropdown and the figure
display(dropdown)
display(fig)

# We initialize the figure
update_figure(None)

Dropdown(options=('Y', 'C', 'G', 'I', 'NX', 'N'), value='Y')

Figure(axes=[Axis(scale=LinearScale(max=2024.0, min=2000.0)), Axis(orientation='vertical', scale=LinearScale()…

It is clear that there is a lot of seasonality in our data. Therefore we decide to take the rolling mean of each variable. This means that each value will be the average of itself and the two values on either side.

In [124]:
# We define the number of periods for the rolling mean
window_size = 5

# We create a new DataFrame without the 'Date' column
df_without_date = df.drop('Date', axis=1)

# We calculate the centered rolling mean for each column
df_rolling = df_without_date.rolling(window_size, center=True).mean()

# We add the 'Date' column back to the DataFrame
df_rolling['Date'] = df['Date']

# We drop rows with NaN values
df_rolling = df_rolling.dropna()

# Analysis

We calculate the growth rates of all the components and assign these new values to a new data set. 

In [125]:
# We convert the columns to numeric, excluding 'Date'
for column in df_rolling.columns:
    if column != 'Date':
        df_rolling[column] = pd.to_numeric(df_rolling[column], errors='coerce')

# We create a new DataFrame for the growth rates
df_growth = pd.DataFrame()
df_growth['Date'] = df_rolling['Date']

# We calculate the growth rates and add them as new columns, excluding 'Date'
for column in df_rolling.columns:
    if column != 'Date':
        df_growth[column + '_growth'] = df_rolling[column].pct_change(fill_method=None) * 100  # Multiply by 100 to get percentage

# We drop rows with NaN values
df_growth = df_growth.dropna()

We make an interactive figure that shows the growth rates of all components against the growth rate of GDP. In order to be able to be able to read the figure, we decide to group the growth rates for each year by taking the average. 

In [126]:
# We convert 'Date' to integer to get the year
df_growth['Year'] = df_growth['Date'].astype(int)

# We group by 'Year' and calculate the mean
df_growth_yearly = df_growth.groupby('Year').mean().reset_index()

# We create a bar chart for 'Y_growth'
fig = go.Figure(data=[
    go.Bar(name='Y_growth', x=df_growth_yearly['Year'], y=df_growth_yearly['Y_growth'])
])

# We add traces for the other variables
variables = ['C_growth', 'G_growth', 'I_growth', 'NX_growth', 'N_growth']
for variable in variables:
    fig.add_trace(
        go.Bar(name=variable, x=df_growth_yearly['Year'], y=df_growth_yearly[variable], visible=False)
    )

# We create a dropdown menu
buttons = []
for i, variable in enumerate(variables):
    visibility = [True] + [False]*i + [True] + [False]*(len(variables)-i-1)
    buttons.append(dict(label=variable, method='update', args=[{'visible': visibility}, {'title': variable}]))

fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=buttons,
        )
    ],
)

fig.update_layout(title=variables[0])

# We show the figure
fig.show()

From our analysis we are able to conclude that:
* Private consumption is very procyclical as the growth rates of C and Y are positive and negative in the same periods. 
* Public consumption is harder to determine than private consumption. Fiscal policy argues that G should be countercyclical to GDP growth. On the other hand the government can spend more during booms. 
* Investments are also procyclical however at low growth rates it is tougher to conclude. 
* From our plot it is impossible to draw any conclusions regarding the pro/counter-cyclical nature of net exports. A reason for this could be, that the net exports depends on the economic activity of the world. 
* The employment rate is very procyclical. 