In [1]:
# == Import libraries for data processing and visualization ==
import matplotlib.pyplot as plt
#from matplotlib.colors import ListedColormap
import seaborn as sns
import os
import joblib
import pandas as pd
from scipy import stats
from tabulate import tabulate
import textwrap
from matplotlib.lines import Line2D

# === Set Working Directory ===

# Prompt user to enter working directory path
#wd = input("Please enter your working directory path (e.g., C:\\Users\\sarah\\OneDrive\\Dokumente\\Masterarbeit): ").strip()

# Change to the entered working directory
#try:
   # os.chdir(wd)
    #print(f"Working directory set to: {os.getcwd()}")
#except FileNotFoundError:
   # print("ERROR: The directory you entered does not exist. Please restart and enter a valid path.")
    #exit(1)

# Set your working directory (adjust this as needed)
wd = r"C:\Users\sarah\OneDrive\Dokumente\Masterarbeit"

# === Define Folder Paths ===
wd_results = os.path.join(wd, 'results')
data_c = os.path.join(wd, 'data')
data_temp = os.path.join(data_c, 'temp')
data_freq = os.path.join(data_c, 'freq')

# === Load data ===
os.chdir(wd_results)
un_corpus_scored = pd.read_csv(
    os.path.join(wd_results, "un_corpus_scored.csv"),
    sep=';', 
    encoding='utf-8'
)

In [2]:
score_table = (
    un_corpus_scored
    .groupby('year')['score']
    .agg(['mean', 'count'])
    .reset_index()
    .rename(columns={'mean':'avg_score', 'count':'n'})
)

score_table['avg_score'] = score_table['avg_score'].round(3)

with pd.option_context('display.max_rows', None):
    display(score_table)

highest_year = score_table.loc[score_table['avg_score'].idxmax()]
lowest_year = score_table.loc[score_table['avg_score'].idxmin()]

print(f"Years with the highest average score: {highest_year['avg_score']} in {int(highest_year['year'])}")
print(f"Years with the lowest average score: {lowest_year['avg_score']} in {int(lowest_year['year'])}")

Unnamed: 0,year,avg_score,n
0,1946,0.713,39
1,1947,0.719,39
2,1948,0.713,39
3,1949,0.675,35
4,1950,0.735,44
5,1951,0.792,51
6,1952,0.727,43
7,1953,0.721,44
8,1954,0.685,42
9,1955,0.638,45


Highest average score: 0.937 in 2024
Lowest average score: 0.596 in 1997


In [26]:
# Compute year-over-year difference
score_table['diff'] = score_table['avg_score'].diff()

# Find years where increase >= 0.08
increased_years = score_table[score_table['diff'] >= 0.05]

# Find years where decrease <= -0.08
decreased_years = score_table[score_table['diff'] <= -0.05]

# Display increases
print("Years with an increase of >= 0.08:")
with pd.option_context('display.max_rows', None):
    display(increased_years[['year', 'avg_score', 'diff']])

# Display decreases
print("Years with a decrease of >= 0.08:")
with pd.option_context('display.max_rows', None):
    display(decreased_years[['year', 'avg_score', 'diff']])


Years with an increase of >= 0.08:


Unnamed: 0,year,avg_score,diff
4,1950,0.735,0.06
5,1951,0.792,0.057
10,1956,0.762,0.124
14,1960,0.773,0.077
22,1968,0.79,0.051
33,1979,0.753,0.084
52,1998,0.717,0.121
55,2001,0.861,0.176
60,2006,0.746,0.059
62,2008,0.797,0.051


Years with a decrease of >= 0.08:


Unnamed: 0,year,avg_score,diff
6,1952,0.727,-0.065
16,1962,0.727,-0.062
24,1970,0.695,-0.088
29,1975,0.661,-0.105
41,1987,0.709,-0.051
45,1991,0.693,-0.051
54,2000,0.685,-0.053
56,2002,0.804,-0.057
59,2005,0.687,-0.105
72,2018,0.822,-0.065


In [14]:
# Top 5 speeches by score
top5 = un_corpus_scored.nlargest(5, 'score')[['country_name', 'year', 'speech', 'score']]
print("Top 5 speeches by score:")
print(top5)

print("\n" + "="*50 + "\n")

# Bottom 5 speeches by score
bottom5 = un_corpus_scored.nsmallest(5, 'score')[['country_name', 'year', 'speech', 'score']]
print("Bottom 5 speeches by score:")
print(bottom5)

Top 5 speeches by score:
                           country_name  year  \
9819                            Eritrea  2002   
10666                             Libya  2023   
3231                              Haiti  2003   
4631   Saint Vincent and the Grenadines  2004   
3384                              Haiti  1994   

                                                  speech     score  
9819   ﻿Let me start by\ncongratulating the President...  1.635238  
10666  I address the General Assembly on behalf of th...  1.624794  
3231   ﻿It is my\nhonour to greet the President on be...  1.624352  
4631   The fifty-ninth session of the General Assembl...  1.559744  
3384    It gives me great pleasure to greet all of yo...  1.517082  


Bottom 5 speeches by score:
      country_name  year                                             speech  \
3712      Slovakia  1995  Mr. President, it is\nundoubtedly a great hono...   
594          Spain  1993  I should like first of all, Mr. President, to\...   

In [16]:
# Closer look at the two most emotional speeches
def print_speech(country, year):
    speech_row = un_corpus_scored[
        (un_corpus_scored['country_name'] == country) &
        (un_corpus_scored['year'] == year)
    ]
    if not speech_row.empty:
        print(f"Speech from {country} in {year}:\n")
        print(speech_row.iloc[0]['speech'])
        print("\n" + "="*50 + "\n")
    else:
        print(f"No speech found for {country} in {year}.\n")

print_speech('Cameroon', 2001)

print_speech('Sao Tome and Principe', 2003)

# Topic Cameroon: XXXXXXXXXXXXXXXXXXXXXXXXXXxxx

# Topic Sao Tome and Principe: XXXXXXXXXXXXXXXXXXXXX

Speech from Cameroon in 2001:

﻿I should like at the outset to express the
profound sympathy and condolences of Cameroon to
the Government and the people of the United States of
America and of the Dominican Republic for the
accident involving the American Airlines Airbus on 12
November in New York.
It was also with great dismay and sadness that we
learned of the natural disaster that struck the brotherly
people of Algeria with such severity. On behalf of the
people and the Government of Cameroon, I would like
to extend to that country our most profound
condolences and solidarity.
Rarely has a session of the General Assembly
been such a focus of international public attention or
aroused the interest of the worldwide media to the
extent that the current session has. The annual session
of the General Assembly is a powerful symbol of the
coming together of nations, the promotion of cultures
and respect for differences and freedoms. This year,
however, a shadow has been cast over our sessio

In [6]:
# Closer look at the two most rational speeches
def print_speech(country, year):
    speech_row = un_corpus_scored[
        (un_corpus_scored['country_name'] == country) &
        (un_corpus_scored['year'] == year)
    ]
    if not speech_row.empty:
        print(f"Speech from {country} in {year}:\n")
        print(speech_row.iloc[0]['speech'])
        print("\n" + "="*50 + "\n")
    else:
        print(f"No speech found for {country} in {year}.\n")

print_speech('Finland', 1993)

print_speech('Italy', 1996)

# Topic Finland: XXXXXXXXXXXXXx

# Topic Italy: XXXXXXXXXXXXXXXXXX

Speech from Finland in 1993:

I wish to begin by
congratulating Ambassador Insanally on his election to the
high office of the presidency of the General Assembly. He
can rest assured that the delegation of Finland will render
him its full support in his challenging task.
On behalf of my Government, I also wish to
congratulate and extend a warm welcome to the new
Members among us. Their participation in the work of the
United Nations will further strengthen the Organization.
Just a few years ago, in a promisingly changing
political environment, high hopes were placed in the United
Nations. Some of these hopes were fulfilled, others were
not. Namibia’s independence process is a fine example of
nation-building in which the United Nations played a crucial
role. Eritrea is another. Let these be examples of wise and
patient conciliation and conflict management. Yet another
example is South Africa, where today we see irrevocable
changes taking place, leading towards a non-racial and
democrati

In [18]:
# Filter for permanent members of the Security Council
permanent_members = un_corpus_scored[un_corpus_scored['security_council_permanent'] == 1]

# Top 5 speeches by score
top5 = permanent_members.nlargest(5, 'score')[['country_name', 'year', 'speech', 'score']]
print("Top 5 speeches by score (Security Council permanent members):")
print(top5)

print("\n" + "="*50 + "\n")

# Bottom 5 speeches by score
bottom5 = permanent_members.nsmallest(5, 'score')[['country_name', 'year', 'speech', 'score']]
print("Bottom 5 speeches by score (Security Council permanent members):")
print(bottom5)

Top 5 speeches by score (Security Council permanent members):
        country_name  year                                             speech  \
5989   United States  2001  ﻿We meet in a Hall devoted to\npeace; in a cit...   
4228   United States  1998  Let me begin by thanking the\nAssembly for its...   
847    United States  2003  ﻿Twenty-four months ago, and\nyesterday in the...   
5650   United States  2017  I welcome everyone to New York. It is a profou...   
10940  United States  2020  It is my profound honor to address the United ...   

          score  
5989   1.404797  
4228   1.394045  
847    1.387432  
5650   1.356830  
10940  1.352940  


Bottom 5 speeches by score (Security Council permanent members):
       country_name  year                                             speech  \
8510         France  1994  France is especially pleased, Mr. President, t...   
2257         Russia  2007  Traditionally, the General Assembly sums up \n...   
8863  United States  1949  Mr. ACHES