# Uncertainty and the real economy - Notebook

This code recreates the uncertainty indices used in Bess et al (2020). The authors would like to thank Rastin Matin for his code relating to the LDA model. If you use this code, please cite:

Bess, M., Grenestam, E., Pedersen, J. and Tang-Andersen Martinello, A. (2020). [Uncertainty and the real economy: Evidence from Denmark.](https://www.nationalbanken.dk/en/publications/Pages/2020/11/Working-Paper-Uncertainty-and-the-real-economy-Evidence-from-Denmark.aspx) Working paper 165, Danmarks Nationalbank. 

# Indices
The block below recreates the indices used in the paper. The output is exported as a csv to data/indices (default). All paths can be set by the user in `input_params.json`

In [None]:
from src.fui.cluster import ClusterTree
import lemmy
from src.fui.lda import LDA
from src.fui.utils import main_directory, dump_pickle, dump_csv, params
from src.fui.ldatools import preprocess, optimize_topics, create_dictionary, merge_documents_and_topics
from src.fui.ldatools import jsd_measure, create_corpus, save_models, load_model, print_topics, parse_topic_labels
from src.fui.indices import LDAIndexer, BloomIndexer, uncertainty_count
from src.fui.preprocessing import parse_for_lda, load_parsed_data
import pandas as pd

# Parse news articles and save to HDF5
parse_for_lda()

# Count uncerainty words in parsed articles
uncertainty_count()

# Import Danish lemmatizer
lemmatizer = lemmy.load("da")

# Create a dictionary and BoW corpus
my_lda = LDA(lemmatizer, test_share=0.0, test=False)
create_dictionary(my_lda, load_bigrams=True)
create_corpus(my_lda)

# Train the LDA model
my_lda.lda_models, coherence_scores = optimize_topics(lda_instance, topics_to_optimize=90, plot=False)
save_models(my_lda, params)

# Export top words to table (see table A1)
labels = parse_topic_labels('labels', 90)
word_list = print_topics(my_lda, topn=30, unique_sort=False)
df = pd.DataFrame(word_list)
for col in df.columns:
     df.rename(columns={col:labels[str(col)]}, inplace=True)
dft = df.transpose()
dft = dft.reset_index()
dft['text'] = dft.iloc[:,1:20].apply(lambda x: ', '.join(x), axis=1)
latex = dft.to_latex("top_words_table.tex", columns=['index', 'text'])

# Get topics from articles and save to HDF5
merge_documents_and_topics(lda_instance)

# Build main index
main_idx = LDAIndexer(name='ep_all')
idx = main_idx.build(num_topics=num_topics,topics=['EP'],topic_thold=0.5,frq='Q')

# Plot index (see figure 1)
main_idx.plot_index(plot_bloom=True, plot_vix=True)

# Build the broad index
broad_idx = LDAIndexer(name='broad')
broad_idx.build(num_topics=num_topics,topics=['broad'],topic_thold=0.5,frq='Q')

# Build our Danish version of the Baker et al. (2016) index
bloom_idx = BloomIndexer(name='bloom')
bloom_idx.build(logic='EandPandU', bloom_dict_name='bloom', extend=False)

# Plots

## Figure 2

```python
import seaborn as sns
import matplotlib.pyplot as plt

df_long = pd.melt(idx.reset_index(), id_vars='date')
df_top = df_long.groupby('date').apply(lambda df : df.nlargest(4, 'value'))
bottom = (df.iloc[:,1:].shape[1]-3)
df_bottom = df_long.groupby('date').apply(lambda df : df.nsmallest(bottom, 'value').sum())
df_top = df_top.pivot(index='date', columns='variable', values='value')

df_top = df_top.drop(['idx'], axis=1)
df_top.index = df_top.index.strftime('%Y-%m')
df_top = df_top*1000

cols = df_top.columns.tolist()
cols = cols[-1:] + cols[:-1]

df_top = df_top[cols]

label_path = os.path.join(params().paths['topic_labels'],
                          'labels' + str(90) + '.json')
with codecs.open(label_path, 'r', encoding='utf-8-sig') as f:
    labels = json.load(f)

nb_colors = ['#017bd1','#92229c','#c43d21','#df9337','#afd247','#86bff4','#caa8e5','#eeb7ba','#666666','#f4d495','#d5eb90','#c1c1c2', "#8cffda", "#b89e97", "#91c499", "#ffa987", "#b5d99c"]

df_top.plot.bar(stacked=True, figsize=(15,7), color=nb_colors, align='center', width=0.7)

ax = plt.gca()
ax.set_xticklabels([t if not i%5 else "" for i,t in enumerate(ax.get_xticklabels())])
h, l = ax.get_legend_handles_labels()
ax.legend([labels[str(x)] for x in l], loc='upper center', bbox_to_anchor=(0.5, -0.19),
          fancybox=True, shadow=False, ncol=5)
ax.tick_params(axis='both', which='major', labelsize=14)
ax.set_ylabel("Raw index value", fontsize='large')
plt.xticks(rotation=45)
plt.tight_layout()
```

## Figure 3

```python
import seaborn as sns
import matplotlib.pyplot as plt
import codecs
import json

label_path = os.path.join(params().paths['topic_labels'], 'labels90.json')
with codecs.open(label_path, 'r', encoding='utf-8-sig') as f:
    labels = json.load(f)

dft=idx.drop(columns = ['idx'], axis=1).transpose()
dft.columns = dft.columns.strftime('%Y-%m')

nbcm = LinearSegmentedColormap.from_list(
        'nbcm', [(0/255,123/255,209/255),  (244/255,212/255,149/255), (196/255,61/255,33/255)], N=200)

fig, ax = plt.subplots(1,1, figsize=(15,8))

ax = sns.heatmap(dft, cmap=nbcm, linewidths=0, annot=False, xticklabels =3)
ax.set_yticklabels([labels[str(i)] for i in dft.index], rotation=0)

plt.tight_layout()
fig.autofmt_xdate()
```

## Figure A3

```python
from src.fui.cluster import ClusterTree

cl90 = ClusterTree(90,metric='cosine')

# Cluster labels added manually
fig, ax, R = cl90.dendrogram(colors=12, annotate=False)
```