In this notebook, I plot the raw daily topics alongside their sentiment- and uncertainty-adjusted counterparts to see how they differ from each other.

In [1]:
import pandas as pd

# Load the daily topics from a CSV file
daily_topics = pd.read_csv('../topics/daily_topics.csv', encoding='utf-8')
# Convert year, month, and day into a single date column
daily_topics['date'] = pd.to_datetime(daily_topics[['year','month','day']])
daily_topics.drop(columns=['year', 'month', 'day'], inplace=True)
# Now, set 'date' as index
daily_topics.set_index('date', inplace=True)

# Load the daily BPW-adjusted topics from a CSV file
bpw_adjusted_daily_topics = pd.read_csv('sentiment_adjusted_daily_topics.csv', encoding='utf-8').iloc[:, 1:]
# Convert year, month, and day into a single date column
bpw_adjusted_daily_topics['date'] = pd.to_datetime(bpw_adjusted_daily_topics[['year','month','day']])
bpw_adjusted_daily_topics.drop(columns=['year', 'month', 'day'], inplace=True)
# Now, set 'date' as index
bpw_adjusted_daily_topics.set_index('date', inplace=True)

# Load the daily SentiWS-adjusted topics from a CSV file
sentiws_adjusted_daily_topics = pd.read_csv('sentiment_adjusted_daily_topics_SentiWS.csv', encoding='utf-8').iloc[:, 1:]
# Convert year, month, and day into a single date column
sentiws_adjusted_daily_topics['date'] = pd.to_datetime(sentiws_adjusted_daily_topics[['year','month','day']])
sentiws_adjusted_daily_topics.drop(columns=['year', 'month', 'day'], inplace=True)
# Now, set 'date' as index
sentiws_adjusted_daily_topics.set_index('date', inplace=True)

# Load the daily uncertainty-adjusted topics from a CSV file
uncertainty_adjusted_daily_topics = pd.read_csv('../uncertainty/uncertainty_adjusted_daily_topics.csv', encoding='utf-8').iloc[:, 1:]
# Convert year, month, and day into a single date column
uncertainty_adjusted_daily_topics['date'] = pd.to_datetime(uncertainty_adjusted_daily_topics[['year','month','day']])
uncertainty_adjusted_daily_topics.drop(columns=['year', 'month', 'day'], inplace=True)
# Now, set 'date' as index
uncertainty_adjusted_daily_topics.set_index('date', inplace=True)

# Load the daily BCC-adjusted topics from a CSV file
bcc_adjusted_daily_topics = pd.read_csv('sign_adjusted_daily_topics_format.csv', encoding='utf-8')
# Convert year, month, and day into a single date column
bcc_adjusted_daily_topics['date'] = pd.to_datetime(bcc_adjusted_daily_topics[['year','month','day']])
bcc_adjusted_daily_topics.drop(columns=['year', 'month', 'day'], inplace=True)
# Now, set 'date' as index
bcc_adjusted_daily_topics.set_index('date', inplace=True)

I compute 180-day rolling mean for each of the series, standardize them, and then, for each of the selected topics, plot the original topic, the BPW-adjusted, the SentiWS-adjusted, and the Uncerainty-adjusted series together.

In [2]:
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# 1) Compute 180‑day rolling means for all four series
daily_topics_rm = daily_topics.rolling(window=180).mean()
bpw_rm          = bpw_adjusted_daily_topics.rolling(window=180).mean()
sentiws_rm      = sentiws_adjusted_daily_topics.rolling(window=180).mean()
uncertainty_rm  = uncertainty_adjusted_daily_topics.rolling(window=180).mean()

# 2) Prepare output directory
outdir = 'selected_topics_plots_BPW_SentiWS_Unc_standardized'
os.makedirs(outdir, exist_ok=True)

# 3) Define recession windows for shading
#recessions = [
#    ("1992-01-01", "1993-12-31"),
#    ("2001-01-01", "2001-12-31"),
#    ("2008-01-01", "2009-12-31"),
#    ("2011-01-01", "2013-12-31")
#]

recessions = [
    ("1992-05-01", "1993-07-31"),  # Post-reunification recession
    ("1995-04-01", "1995-09-30"),  # Mid-1990s slowdown
    ("1998-08-31", "1999-02-28"),  # Asian/Russian crisis slowdown
    ("2001-03-01", "2002-01-31"),  # Dot-com recession
    ("2002-10-01", "2003-04-30"),  # Early-2000s stagnation
    ("2008-04-30", "2009-05-31"),  # Great Recession
    ("2011-09-30", "2012-02-28"),  # European sovereign debt crisis (phase 1)
    ("2012-09-30", "2012-12-31"),  # European sovereign debt crisis (phase 2)
]

# 4) Topics to plot
selected_topics = [29, 50, 120]

for idx in selected_topics:
    
    # Flip sign of the original 180-day rolling topic
    orig = -daily_topics_rm.iloc[:, idx]
    bpw = bpw_rm.iloc[:, idx]
    sentiws = sentiws_rm.iloc[:, idx]
    uncertainty = -uncertainty_rm.iloc[:, idx]
    
    # Standardize each series (z-score)
    orig_z = (orig - orig.mean()) / orig.std()
    bpw_z  = (bpw  - bpw.mean())  / bpw.std()
    sentiws_z  = (sentiws  - sentiws.mean())  / sentiws.std()
    uncertainty_z  = (uncertainty  - uncertainty.mean())  / uncertainty.std()
    
    # Plot all standardized series on a single axis
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(orig_z.index, orig_z, label='Plain topic', color='black')
    ax.plot(bpw_z.index,  bpw_z,  label='Economic lexicon-adjusted topic',   color='tab:blue')
    ax.plot(sentiws_z.index,  sentiws_z,  label='General lexicon-adjusted topic', color='tab:red')
    ax.plot(uncertainty_z.index,  uncertainty_z,  label='Uncertainty-adjusted topic', color='tab:green')
    
    # Add shaded areas for recessions
    for start, end in recessions:
        ax.axvspan(pd.to_datetime(start), pd.to_datetime(end), color='grey', alpha=0.3)
    
    # Labels, legend, and formatting
    ax.set_xlabel('Date')
    ax.set_ylabel('Standardized Topic Proportion')
    ax.legend(loc='upper right')
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.setp(ax.get_xticklabels(), rotation=45)
    
    # Save
    fig.savefig(os.path.join(outdir, f'Topic_{idx}_standardized.png'), bbox_inches='tight')
    plt.savefig(os.path.join(outdir, f'Topic_{idx}_standardized.eps'), format='eps')
    plt.close(fig)

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.


I also add BCC-adjusted series for topics 127, 27, and 11.

In [3]:
import os
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# 1) Compute 180‑day rolling means for all five series
daily_topics_rm = daily_topics.rolling(window=180).mean()
bpw_rm          = bpw_adjusted_daily_topics.rolling(window=180).mean()
sentiws_rm      = sentiws_adjusted_daily_topics.rolling(window=180).mean()
uncertainty_rm  = uncertainty_adjusted_daily_topics.rolling(window=180).mean()
bcc_rm  = bcc_adjusted_daily_topics.rolling(window=180).mean()

# 2) Prepare output directory
outdir = 'selected_topics_plots_BPW_SentiWS_Unc_BCC_standardized'
os.makedirs(outdir, exist_ok=True)

# 3) Define recession windows for shading
#recessions = [
#    ("1992-01-01", "1993-12-31"),
#    ("2001-01-01", "2001-12-31"),
#    ("2008-01-01", "2009-12-31"),
#    ("2011-01-01", "2013-12-31")
#]

recessions = [
    ("1992-05-01", "1993-07-31"),  # Post-reunification recession
    ("1995-04-01", "1995-09-30"),  # Mid-1990s slowdown
    ("1998-08-31", "1999-02-28"),  # Asian/Russian crisis slowdown
    ("2001-03-01", "2002-01-31"),  # Dot-com recession
    ("2002-10-01", "2003-04-30"),  # Early-2000s stagnation
    ("2008-04-30", "2009-05-31"),  # Great Recession
    ("2011-09-30", "2012-02-28"),  # European sovereign debt crisis (phase 1)
    ("2012-09-30", "2012-12-31"),  # European sovereign debt crisis (phase 2)
]

# 4) Topics to plot
selected_topics = [29, 50, 7]
selected_topics_bcc = [127, 27, 11]

for idx, bcc_idx in zip(selected_topics, selected_topics_bcc):
    
    # Flip sign of the original 180-day rolling topic
    orig = -daily_topics_rm.iloc[:, idx]
    bpw = bpw_rm.iloc[:, idx]
    sentiws = sentiws_rm.iloc[:, idx]
    uncertainty = -uncertainty_rm.iloc[:, idx]
    bcc         =  bcc_rm.iloc[:, bcc_idx]
    
    # Standardize each series (z-score)
    orig_z = (orig - orig.mean()) / orig.std()
    bpw_z  = (bpw  - bpw.mean())  / bpw.std()
    sentiws_z  = (sentiws  - sentiws.mean())  / sentiws.std()
    uncertainty_z  = (uncertainty  - uncertainty.mean())  / uncertainty.std()
    bcc_z  = (bcc  - bcc.mean())  / bcc.std()
    
    # Plot all standardized series on a single axis
    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(orig_z.index, orig_z, label='Plain topic', color='black')
    ax.plot(bpw_z.index,  bpw_z,  label='Economic lexicon-adjusted topic',   color='tab:blue')
    ax.plot(sentiws_z.index,  sentiws_z,  label='General lexicon-adjusted topic', color='tab:red')
    ax.plot(uncertainty_z.index,  uncertainty_z,  label='Uncertainty-adjusted topic', color='tab:green')
    ax.plot(bcc_z.index,  bcc_z,  label='BCS-adjusted topic', color='tab:orange')
    
    # Add shaded areas for recessions
    for start, end in recessions:
        ax.axvspan(pd.to_datetime(start), pd.to_datetime(end), color='grey', alpha=0.3)
    
    # Labels, legend, and formatting
    ax.set_xlabel('Date')
    ax.set_ylabel('Standardized Topic Proportion')
    ax.legend(loc='upper left', bbox_to_anchor=(1.02, 1), borderaxespad=0)
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    plt.setp(ax.get_xticklabels(), rotation=45)
    
    # Save
    fig.savefig(os.path.join(outdir, f'Topic_{idx}_standardized.png'), bbox_inches='tight')
    plt.savefig(os.path.join(outdir, f'Topic_{idx}_standardized.eps'), format='eps')
    plt.close(fig)

The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
The PostScript backend does not support transparency; partially transparent artists will be rendered opaque.
