# Development of Emotion and Reasoning in the General Speeches of the United Nations: A text-based machine learning approach
## Additional Analysis: Different individualised Stopwords & Different Calculation of Weigthed Frequencies - Figures

Ensure that all scripts from the notebook folder and Additional Analysis ran before hand.

### Description: 
#### This file creates the following figures

Result Figures
- Emotionaliy over time (Overall) vs. Emotionality over time (Ind. Stopwords) vs. Emotionality over time (Different Calculation of Weighted Frequencies)

In [None]:
# --- Set base path to project root ---
base_path = Path.cwd().parent  # project root
print(f"Project root set to: {base_path}")

data_c = base_path / "data"
data_results = data_c / 'results'

In [None]:
# === Load data ===
os.chdir(data_results)
un_corpus_scored = pd.read_csv(
    os.path.join(data_results, "un_corpus_scored.csv"),
    sep=';', 
    encoding='utf-8'
)

ind_stopwords_un_corpus_scored = pd.read_csv(
    os.path.join(data_results, "ind_stopwords_un_corpus_scored.csv"),
    sep=';', 
    encoding='utf-8'
)

removed_lowfreq_words_un_corpus_scored = pd.read_csv(
    os.path.join(data_results, "removed_lowfreq_words_un_corpus_scored.csv"),
    sep=';', 
    encoding='utf-8'
)

In [None]:
# == Define Global Plotting Settings ===

FIGSIZE = (12, 6)
LINEWIDTH = 2
MARKER = None

# Grid style
GRID_STYLE = "whitegrid"
sns.set_style(GRID_STYLE)

# X-axis ticks
TICK_YEARS = [1946] + list(range(1950, 2025, 5))
if 2024 not in TICK_YEARS:
    TICK_YEARS.append(2024)
XTICK_ROTATION = 45

# Font sizes
AXIS_LABEL_FONT_SIZE = 14     
TICK_LABEL_FONT_SIZE = 14      
LEGEND_FONT_SIZE = 12         

# Tick Settings 
TICK_LENGTH = 5
TICK_WIDTH = 2
TICK_COLOR = 'black'
TICK_DIRECTION = 'out'

# Y-axis range for primary axis (y1)
Y_RANGE_EMOTIONALITY = (0.5, 1.1)

# Y-axis for second axis (y2)
AX_SET_YLIM = 0
AX_SET_Y_MARGIN = 0
SECOND_Y_AXIS_BOTTOM = 0

# Spines & Grid Lines
SPINE_COLOR = 'darkgrey'
SPINE_WIDTH = 1.5
GRID_COLOR = 'darkgrey'
GRID_WIDTH = 1.0


# Legend settings
LEGEND_LOC = "upper center"
LEGEND_BBOX = (0.5, -0.15)
LEGEND_NCOL = 3     

# Layout
TIGHT_LAYOUT_RECT = [0, 0, 1, 1]

# Despine options
DESPINE_SETTINGS = dict(left=True, right=True, top=True, bottom=False)

# Note
FIG_NOTE_FONT = dict(ha='center', fontsize=14, fontstyle='italic')

# Color Palette
CUSTOM_COLORS = ['#004260', '#50822E', '#f2c80c',
                 #'#F69B2D', 
                 '#E4003A', '#D95F02']

### Figure: Emotionality over time

In [None]:
# === Compute mean emotionality per year for each dataset ===
mean_all = un_corpus_scored.groupby('year')['score'].mean()
mean_removed_lowfreq = removed_lowfreq_words_un_corpus_scored.groupby('year')['score'].mean()
mean_ind_stopwords = ind_stopwords_un_corpus_scored.groupby('year')['score'].mean()

# === Plot ===
fig, ax = plt.subplots(figsize=FIGSIZE)

sns.lineplot(
    x=mean_all.index,
    y=mean_all.values,
    linewidth=LINEWIDTH,
    marker=MARKER,
    color=CUSTOM_COLORS[0],
    label="Full Corpus",
    ax=ax
)

sns.lineplot(
    x=mean_removed_lowfreq.index,
    y=mean_removed_lowfreq.values,
    linewidth=LINEWIDTH,
    marker=MARKER,
    color=CUSTOM_COLORS[1],
    label="Removed Low-Frequency Words",
    ax=ax
)

sns.lineplot(
    x=mean_ind_stopwords.index,
    y=mean_ind_stopwords.values,
    linewidth=LINEWIDTH,
    marker=MARKER,
    color=CUSTOM_COLORS[2],
    label="Removed Stopwords",
    ax=ax
)

ax.set_xlabel("")
ax.set_ylabel("Emotionality Score", fontsize=AXIS_LABEL_FONT_SIZE)
ax.set_ylim(Y_RANGE_EMOTIONALITY)
ax.margins(y=AX_SET_Y_MARGIN)

ax.set_xlim(1946, 2024)
ax.set_xticks(TICK_YEARS)
ax.set_xticklabels(TICK_YEARS, rotation=XTICK_ROTATION, fontsize=TICK_LABEL_FONT_SIZE)

ax.tick_params(
    axis='y', which='both', left=True, right=False,
    direction=TICK_DIRECTION, length=TICK_LENGTH, width=TICK_WIDTH,
    color=TICK_COLOR, labelsize=TICK_LABEL_FONT_SIZE
)
ax.tick_params(
    axis='x', which='both', bottom=True, top=False,
    direction=TICK_DIRECTION, length=TICK_LENGTH, width=TICK_WIDTH,
    color=TICK_COLOR, labelsize=TICK_LABEL_FONT_SIZE
)

ax.spines['left'].set_color(SPINE_COLOR)
ax.spines['left'].set_linewidth(SPINE_WIDTH)
ax.spines['bottom'].set_color(SPINE_COLOR)
ax.spines['bottom'].set_linewidth(SPINE_WIDTH)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

ax.yaxis.grid(True, color=GRID_COLOR, linewidth=GRID_WIDTH, zorder=0)
ax.xaxis.grid(True, color=GRID_COLOR, linewidth=GRID_WIDTH, zorder=0)

ax.legend(
    loc=LEGEND_LOC,
    bbox_to_anchor=LEGEND_BBOX,
    ncol=LEGEND_NCOL,
    frameon=False,
    fontsize=LEGEND_FONT_SIZE
)

note_text = f"Notes: The sample includes {overall_total} speeches."
wrapped_text = "\n".join(textwrap.wrap(note_text, width=100))
plt.figtext(0.5, -0.07, wrapped_text, **FIG_NOTE_FONT)

plt.tight_layout(rect=TIGHT_LAYOUT_RECT)
plt.savefig(os.path.join(fig_dir, 'emotionality_score_three_lines.png'), bbox_inches='tight')
plt.show()