In [None]:
# == Import libraries for data processing and visualization ==
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import os
import joblib
import pandas as pd
from scipy import stats
from tabulate import tabulate
import textwrap
from matplotlib.lines import Line2D
from gensim.models import Word2Vec
from wordcloud import WordCloud
import random
from scipy.spatial.distance import cosine
import numpy as np
import ast

In [None]:
# === Set Working Directory ===

# --- Set base path to project root ---
base_path = Path.cwd().parent  # project root
print(f"Project root set to: {base_path}")

# === Define Folder Paths ===

data_c = base_path / "data"
data_results = data_c / 'results'
data_temp = data_c / 'temp'
data_freq = data_c / 'freq'
data_dict = data_c / 'dictionaries'
fig_dir = wd / 'fig'
data_models = data_c / 'models'

# === Load data ===

ind_stopwords_un_corpus_scored = pd.read_csv(
    os.path.join(data_results, "ind_stopwords_un_corpus_scored.csv"),
    sep=';', 
    encoding='utf-8'
)

In [None]:
# == Define Global Plotting Settings ===

FIGSIZE = (12, 6)
LINEWIDTH = 2
MARKER = None

# Grid style
GRID_STYLE = "whitegrid"
sns.set_style(GRID_STYLE)

# X-axis ticks
TICK_YEARS = [1946] + list(range(1950, 2025, 5))
if 2024 not in TICK_YEARS:
    TICK_YEARS.append(2024)
XTICK_ROTATION = 45

# Font sizes
AXIS_LABEL_FONT_SIZE = 14     
TICK_LABEL_FONT_SIZE = 14      
LEGEND_FONT_SIZE = 12         

# Tick Settings 
TICK_LENGTH = 5
TICK_WIDTH = 2
TICK_COLOR = 'black'
TICK_DIRECTION = 'out'

# Y-axis range for primary axis (y1)
Y_RANGE_EMOTIONALITY = (0.5, 1.1)

# Y-axis for second axis (y2)
AX_SET_YLIM = 0
AX_SET_Y_MARGIN = 0
SECOND_Y_AXIS_BOTTOM = 0

# Spines & Grid Lines
SPINE_COLOR = 'darkgrey'
SPINE_WIDTH = 1.5
GRID_COLOR = 'darkgrey'
GRID_WIDTH = 1.0


# Legend settings
LEGEND_LOC = "upper center"
LEGEND_BBOX = (0.5, -0.15)
LEGEND_NCOL = 3     

# Layout
TIGHT_LAYOUT_RECT = [0, 0, 1, 1]

# Despine options
DESPINE_SETTINGS = dict(left=True, right=True, top=True, bottom=False)

# Note
FIG_NOTE_FONT = dict(ha='center', fontsize=14, fontstyle='italic')

# Color Palette
CUSTOM_COLORS = ['#004260', '#50822E', '#f2c80c',
                 #'#F69B2D', 
                 '#E4003A', '#D95F02']

### Figure: Emotionality over time (Individual Stopwords List)

In [None]:
# Mean score per year
score_over_time = ind_stopwords_un_corpus_scored.groupby('year')['score'].mean()

overall_sample_size_per_year = ind_stopwords_un_corpus_scored['year'].value_counts().sort_index()

fig, ax1 = plt.subplots(figsize=FIGSIZE)

x = score_over_time.index
y = score_over_time.values

sns.lineplot(
    x=x,
    y=y,
    marker=MARKER,
    color=CUSTOM_COLORS[0],
    linewidth=LINEWIDTH,
    ax=ax1
)

ax1.set_xlabel("")
ax1.set_ylabel("Emotionality Score", fontsize=AXIS_LABEL_FONT_SIZE)
ax1.set_ylim(Y_RANGE_EMOTIONALITY)
ax1.margins(y=AX_SET_Y_MARGIN)

ax1.set_xlim(1946, 2024)
ax1.set_xticks(TICK_YEARS)
ax1.set_xticklabels(TICK_YEARS, rotation=XTICK_ROTATION, fontsize=TICK_LABEL_FONT_SIZE)

ax1.tick_params(
    axis='y', which='both', left=True, right=False,
    direction=TICK_DIRECTION, length=TICK_LENGTH, width=TICK_WIDTH,
    color=TICK_COLOR, labelsize=TICK_LABEL_FONT_SIZE
)
ax1.tick_params(
    axis='x', which='both', bottom=True, top=False,
    direction=TICK_DIRECTION, length=TICK_LENGTH, width=TICK_WIDTH,
    color=TICK_COLOR, labelsize=TICK_LABEL_FONT_SIZE
)


ax1.spines['left'].set_color(SPINE_COLOR)
ax1.spines['left'].set_linewidth(SPINE_WIDTH)
ax1.spines['bottom'].set_color(SPINE_COLOR)
ax1.spines['bottom'].set_linewidth(SPINE_WIDTH)
ax1.spines['top'].set_visible(False)
ax1.spines['right'].set_visible(False)

ax1.yaxis.grid(True, color=GRID_COLOR, linewidth=GRID_WIDTH, zorder=0)
ax1.xaxis.grid(True, color=GRID_COLOR, linewidth=GRID_WIDTH, zorder=0)


ax2 = ax1.twinx()
x2 = overall_sample_size_per_year.index
y2 = overall_sample_size_per_year.values

sns.lineplot(
    x=x2,
    y=y2,
    color='darkgrey',
    linewidth=LINEWIDTH,
    ax=ax2
)
ax2.set_ylabel("Sample Size per Year", color='darkgrey', fontsize=AXIS_LABEL_FONT_SIZE)
ax2.tick_params(
    axis='y', colors='darkgrey', direction=TICK_DIRECTION,
    length=TICK_LENGTH, width=TICK_WIDTH, labelsize=TICK_LABEL_FONT_SIZE
)
ax2.spines['right'].set_visible(True)
ax2.spines['right'].set_color(SPINE_COLOR)
ax2.spines['right'].set_linewidth(SPINE_WIDTH)
ax2.set_ylim(bottom=SECOND_Y_AXIS_BOTTOM)

sns.despine(left=False, right=False, top=True, bottom=False)

note_text = (
    f"Notes: The sample includes {overall_total} speeches."
)
wrapped_text = "\n".join(textwrap.wrap(note_text, width=100))
plt.figtext(0.5, -0.07, wrapped_text, **FIG_NOTE_FONT)

plt.tight_layout(rect=TIGHT_LAYOUT_RECT)
plt.savefig(os.path.join(fig_dir, 'emotionality_score_over_time_ind_stopwords.png'), bbox_inches='tight')
plt.show()