# Development of Emotion and Reasoning in the General Speeches of the United Nations: A text-based machine learning approach
## MAIN - File

### Description: 
It automatically runs all notebooks in the correct order, handles optional additional analyses, and can install required Python packages and download necessary resources. All outputs, including figures, tables, and results, are saved to their respective folders.

It runs all notebooks in the correct order. The script installs required packages and downloads necessary resources. If you do not want this, set InstallPackages = False. It does not overwrite existing packages; it only installs packages that are missing. The script can also download necessary resources for NLTK and spaCy. This includes tokenizers, taggers, and the en_core_web_lg spaCy model. Existing resources are not overwritten.

By default, it will also run the Additional Analysis. If you do not want the Additional Analysis to run, set RUN_ADDITIONAL_ANALYSIS = False. t
All figures, tables, and results are saved automatically in the corresponding folders.

##  Installation of required Packages

In [13]:
# If set to true it installs the following packages (The function will only install packages that are not installed yet):

InstallPackages = True 

if InstallPackages:
    import sys
    import subprocess
    import importlib

    packages = [
         "gensim",
        "joblib",
        "matplotlib",
        "nbconvert",
        "nltk",
        "numpy",
        "pandas",
        "pycountry",
        "rapidfuzz",
        "scipy",
        "seaborn",
        "spacy",
        "tableone",
        "tabulate",
        "tqdm"
        
    ]

    for package in packages:
        if importlib.util.find_spec(package) is None:
            print(f"Installing package: {package}")
            subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        else:
            print(f"Package already installed: {package}")

# Set to True to download resources; it will only install resources that are missing
DownloadAdditions = True  

if DownloadAdditions:
    import nltk
    import spacy
    import subprocess
    import sys

    # --- NLTK resources ---
    nltk_packages = ["punkt", "averaged_perceptron_tagger"]
    for pkg in nltk_packages:
        try:
            nltk.data.find(f"tokenizers/{pkg}" if pkg == "punkt" else f"taggers/{pkg}")
            print(f"NLTK resource already exists: {pkg}")
        except LookupError:
            print(f"Downloading NLTK resource: {pkg}")
            nltk.download(pkg)

    # --- spaCy model ---
    spacy_model = "en_core_web_lg"
    try:
        spacy.load(spacy_model)
        print(f"spaCy model already exists: {spacy_model}")
    except OSError:
        print(f"Downloading spaCy model: {spacy_model}")
        subprocess.check_call([sys.executable, "-m", "spacy", "download", spacy_model])

Package already installed: gensim
Package already installed: joblib
Package already installed: matplotlib
Package already installed: nbconvert
Package already installed: nltk
Package already installed: numpy
Package already installed: pandas
Package already installed: pycountry
Package already installed: rapidfuzz
Package already installed: scipy
Package already installed: seaborn
Package already installed: spacy
Installing package: tableone
Package already installed: tabulate
Package already installed: tqdm
NLTK resource already exists: punkt
NLTK resource already exists: averaged_perceptron_tagger
spaCy model already exists: en_core_web_lg


## Run Notebooks

In [15]:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
from pathlib import Path

In [23]:

def run_notebook(notebook_path, timeout=20000):
    """
    Executes the Jupyter notebooks automatically.

    The notebook is loaded and run cell-by-cell.
    """
    notebook_path = Path(notebook_path)
    if not notebook_path.exists():
        raise FileNotFoundError(f"Notebook {notebook_path} not found.")

    print(f"Running notebook: {notebook_path.name} ...")
    with open(notebook_path, "r", encoding="utf-8") as f:
        nb = nbformat.read(f, as_version=4)

    ep = ExecutePreprocessor(timeout=timeout, kernel_name="python3")
    ep.preprocess(nb, {'metadata': {'path': notebook_path.parent}})

    print(f"Finished notebook: {notebook_path.name}\n")

In [24]:
run_notebook("notebooks/0_data_creation.ipynb")

Running notebook: 0_data_creation.ipynb ...
Finished notebook: 0_data_creation.ipynb



In [34]:
run_notebook("notebooks/1_model_training_centroids_scoring.ipynb")

Running notebook: 1_model_training_centroids_scoring.ipynb ...
Finished notebook: 1_model_training_centroids_scoring.ipynb



In [38]:
run_notebook("notebooks/2_figures.ipynb")

Running notebook: 2_figures.ipynb ...
Finished notebook: 2_figures.ipynb



In [39]:
run_notebook("notebooks/3_tables.ipynb")

Running notebook: 3_tables.ipynb ...
Finished notebook: 3_tables.ipynb



## Additional Analysis

In [54]:
# === Optional: Run Additional Analysis ===
# If you want to run also the additional analysis, then set the function here to TRUE
RUN_ADDITIONAL_ANALYSIS =True  # Set to True to execute additional analysis notebooks

if RUN_ADDITIONAL_ANALYSIS:
    """
    Executes the additional analysis notebooks automatically.

    Each notebook is loaded and run cell-by-cell.
    """
    additional_notebooks = [
        # Different Calculation Weighted Frequencies
        "notebooks/Additional Analysis/Different Calculation Weighted Frequencies/0_data_creation_changed_weighted_freq.ipynb",
        "notebooks/Additional Analysis/Different Calculation Weighted Frequencies/1_model_training_centroids_scoring_changed_weighted_freq.ipynb",

        # Individual Stopwords
        "notebooks/Additional Analysis/Individual Stopwords/0_data_creation_indiv_stopwords.ipynb",
        "notebooks/Additional Analysis/Individual Stopwords/1_model_training_centroids_scoring_indiv_stopwords.ipynb",

        # Figure Comparison Emotionality Score for the different calculations
        "notebooks/Additional Analysis/2_figure_comparison_emotionality_score.ipynb",
    ]

    print("Running Additional Analysis Notebooks...")
    for nb in additional_notebooks:
        run_notebook(nb)

    print("All Notebooks for Additional analysis executed successfully.")

Running Additional Analysis Notebooks...
Running notebook: 0_data_creation_changed_weighted_freq.ipynb ...
Finished notebook: 0_data_creation_changed_weighted_freq.ipynb

Running notebook: 1_model_training_centroids_scoring_changed_weighted_freq.ipynb ...
Finished notebook: 1_model_training_centroids_scoring_changed_weighted_freq.ipynb

Running notebook: 0_data_creation_indiv_stopwords.ipynb ...
Finished notebook: 0_data_creation_indiv_stopwords.ipynb

Running notebook: 1_model_training_centroids_scoring_indiv_stopwords.ipynb ...
Finished notebook: 1_model_training_centroids_scoring_indiv_stopwords.ipynb

Running notebook: 2_figure_comparison_emotionality_score.ipynb ...
Finished notebook: 2_figure_comparison_emotionality_score.ipynb

All Notebooks for Additional analysis executed successfully.
