In this notebook, we visualize time series data for both the Loughran-McDonald (LM) sentiment scores and the estimated daily topics.

In [1]:
import os
import csv
from pathlib import Path
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sb

from matplotlib import rc
rc('text', usetex=True)

read data

In [2]:
topics_file = 'analysis_topics/daily_topics.csv'
daily_topics = pd.read_csv(topics_file)

daily_topics['dates_day'] = pd.to_datetime(daily_topics['dates_day'])

In [3]:
daily_sentiment = pd.read_excel('sentiment_daily_LM.xlsx')
daily_sentiment['dates_day'] = pd.to_datetime(daily_sentiment['dates_day'])

Merge dataframes

In [4]:
daily = daily_topics.merge(
    daily_sentiment, 
    on='dates_day', how='inner').set_index('dates_day')

In [5]:
regressors = [f'T{topic}' for topic in range(100)]

In [6]:
# get topic names (first word)

topics = list()
with open('analysis_topics/topic_description.csv', 'r', encoding='utf-8-sig') as f:
    reader = csv.reader((f))
    for row in reader:
        if row[0].startswith('topic'):
            topics.append(row)

# dictionary mapping topics (first words) to topic names in dataframes
topic_dict = dict()
for topic in topics:
    topic_dict['T' + topic[0].replace('topic', '')] = topic[1].capitalize()

In [7]:
sb.set_style('whitegrid')

Topics smooth

In [8]:
%%capture

X = daily[regressors + ['sentiment_LM']][30:]

# size of rolling window
window = 30
# information textstring 
textstr = f'{window}-day moving average'
# textbox properties
props = dict(boxstyle='round', facecolor='white', edgecolor='black', alpha=0.5)

# loop through all topics to create one line plot for each
for x in X.columns:
    plt.figure(figsize=(9,6))
    ax = plt.gca()
    # sentiment is not in the topics dictionary, need to branch it differently
    if x == 'sentiment_LM':
        plt.title('LM Sentiment')
        textstr = f'{window}-day moving average'
    else:
        topic_name = topic_dict[x]
        if "_" in topic_name:
            topic_name = topic_name.replace("_", r"\_")
        #plt.title(f'topic: {x} ({topic_name})')
    # setting up the xaxis
    pseudo_dates = X.index
    # smoothen the lines a bit
    line = X[x].rolling(window, center=True).mean()
    # plotting the line
    plt.plot(pseudo_dates, line, label=x, lw=0.9, c='red', ls='-', alpha=0.7)
    # place a text box in upper left in axes coords
    if x == 'sentiment':
        ax.text(0.8, 0.95, textstr, transform=ax.transAxes, fontsize=18,
                verticalalignment='top', bbox=props)
    else:
        ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=18,
                verticalalignment='top', bbox=props)
        
    ax.tick_params(axis='both', labelsize=16)
    # save figure
    plt.savefig(f'plots/topics_smooth/Topic{x}.png')