# Development of Emotion and Reasoning in the General Speeches of the United Nations: A text-based machine learning approach
## Script 3: Tables
### Author: Sarah Franzen

### Instructions BEFORE running this script:
- Ensure you ran Script 0-2 before completely to create proper folder structure and to get the required data

### Description: 
#### This file creates the following figures and tables

Tables
- Summary Statistics of the given variables
- Summary Statistics Emotionality Scoring - per Decade
- Summary Statistics Emotionality Scoring - XXXX
- T-Test for suplementary data on Gender and Position of the Speaker


In [41]:
# == Import libraries for data processing and visualization ==
import joblib
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import seaborn as sns
from scipy import stats
from tabulate import tabulate
import re

# === Set Working Directory ===
# Prompt user to enter working directory path
#wd = input("Please enter your working directory path (e.g., C:\\Users\\sarah\\OneDrive\\Dokumente\\Masterarbeit): ").strip()

# Change to the entered working directory
#try:
   # os.chdir(wd)
    #print(f"Working directory set to: {os.getcwd()}")
#except FileNotFoundError:
   # print("ERROR: The directory you entered does not exist. Please restart and enter a valid path.")
    #exit(1)

# Set your working directory (adjust this as needed)
wd = r"C:\Users\sarah\OneDrive\Dokumente\Masterarbeit"

# === Define Folder Paths ===
data_c = os.path.join(wd, 'data')
data_results = os.path.join(data_c, 'results')
data_temp = os.path.join(data_c, 'temp')
data_freq = os.path.join(data_c, 'freq')
tables_dir = os.path.join(wd, 'tables')

# === Load data ===

os.chdir(tables_dir)
un_corpus_scored = pd.read_csv(
    os.path.join(data_results, "un_corpus_scored.csv"),
    sep=';', 
    encoding='utf-8'
)

# Ensure no Missings in the emotionality scoring
rows_before = len(un_corpus_scored)
un_corpus_scored = un_corpus_scored[un_corpus_scored['score'].notna()]
print(f"Rows dropped due to missing score: {rows_before - len(un_corpus_scored)}")

Rows dropped due to missing score: 0


## Summary Statistics of the given variables
Create Table and export as tex-file

In [43]:
# Create seperate dummies on the position variable to get a nice summary table

position_nonmissing = un_corpus_scored['position'].notna()

position_dummies = pd.get_dummies(un_corpus_scored.loc[position_nonmissing, 'position'])

position_dummies = position_dummies.astype(int)

position_dummies = position_dummies.reindex(un_corpus_scored.index)

position_dummies.loc[~position_nonmissing, :] = pd.NA

position_dummies = position_dummies.astype("Int64")

un_corpus_scored = pd.concat([un_corpus_scored, position_dummies], axis=1)

In [44]:
all_numeric_vars = ['year', 'speech_length_words', 'english_official_language',
                    'security_council_permanent', 'gender_dummy'] + list(position_dummies.columns)

summary_table = pd.DataFrame({
    "Variable": all_numeric_vars,
    "Obs": un_corpus_scored[all_numeric_vars].count().astype(int),
    "Mean": un_corpus_scored[all_numeric_vars].mean().round(3),
    "SD": un_corpus_scored[all_numeric_vars].std().round(3),
    "Min": un_corpus_scored[all_numeric_vars].min(),
    "Max": un_corpus_scored[all_numeric_vars].max()
})

position_header = pd.DataFrame({
    "Variable": ["Position"],
    "Obs": [""],
    "Mean": [""],
    "SD": [""],
    "Min": [""],
    "Max": [""]
})

insert_idx = 5
summary_table = pd.concat([summary_table.iloc[:insert_idx],
                           position_header,
                           summary_table.iloc[insert_idx:]]).reset_index(drop=True)

var_labels = {
    "year": "Year",
    "speech_length_words": "Number of Words",
    "english_official_language": "English as Official Language (Yes = 1)",
    "security_council_permanent": "Permanent Member Security Council (Yes = 1)",
    "gender_dummy": "Gender (Female = 1)",
    "(Deputy) Minister for Foreign Affairs": "&nbsp;&nbsp;&nbsp;&nbsp;(Deputy) Minister for Foreign Affairs",
    "(Deputy) Prime Minister": "&nbsp;&nbsp;&nbsp;&nbsp;(Deputy) Prime Minister",
    "(Vice-) President": "&nbsp;&nbsp;&nbsp;&nbsp;(Vice-) President",
    "Diplomatic Representative": "&nbsp;&nbsp;&nbsp;&nbsp;Diplomatic Representative",
    "Others": "&nbsp;&nbsp;&nbsp;&nbsp;Others"
}
summary_table['Variable'] = summary_table['Variable'].replace(var_labels)

numeric_cols = ['Mean','SD','Min','Max']
summary_table[numeric_cols] = summary_table[numeric_cols].replace("", pd.NA)

summary_table[['Min', 'Max']] = summary_table[['Min', 'Max']].astype('Int64')

styled_table = summary_table.style \
    .hide(axis="index") \
    .set_table_styles([
        {'selector': 'th', 'props': [
            ('border-bottom', '3px solid black'), 
            ('color', 'black'),
            ('font-weight', 'bold'),
            ('text-align', 'center'),
            ('background-color', 'white')
        ]},
        {'selector': 'th.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col1', 'props': [('min-width', '80px')]},
        {'selector': 'td.col2', 'props': [('min-width', '80px')]},
        {'selector': 'td.col3', 'props': [('min-width', '80px')]},
        {'selector': 'td.col4', 'props': [('min-width', '80px')]},
        {'selector': 'td.col5', 'props': [('min-width', '80px')]}
    ]) \
    .set_properties(**{'text-align': 'center'}, subset=['Obs','Mean','SD','Min','Max']) \
    .format({"Mean": "{:.3f}", "SD": "{:.3f}"})

# --- EXPORT HTML ---
styled_table.to_html("Summary_Statistics_Table.html")

# --- EXPORT LaTeX ---
latex_ready = summary_table.copy()
latex_ready["Variable"] = latex_ready["Variable"].apply(
    lambda x: re.sub(r"&nbsp;+", r"\\hspace*{1em}", str(x)) if isinstance(x, str) else x
)

latex_table = latex_ready.to_latex(
    index=False,
    na_rep="",
    float_format="%.3f",
    column_format="lrrrrr",
    caption="Summary Statistics",
    label="tab:summary_stats",
    header=["Variable", "Obs", "Mean", "SD", "Min", "Max"],
    bold_rows=False,
    escape=False  
)


latex_table = latex_table.replace(
    "\\toprule",
    "\\hline\\hline"
).replace(
    "\\midrule",
    "\\hline"
).replace(
    "\\bottomrule",
    "\\hline\\hline"
)

# Save LaTeX file
with open("Summary_Variables.tex", "w", encoding="utf-8") as f:
    f.write(latex_table)



print(summary_table.to_string(index=False))

                                                     Variable   Obs      Mean        SD  Min   Max
                                                         Year 10952  1993.296    20.186 1946  2024
                                              Number of Words 10952   2913.75  1502.019  423 22003
                       English as Official Language (Yes = 1) 10952     0.239     0.426    0     1
                  Permanent Member Security Council (Yes = 1) 10952     0.035     0.185    0     1
                                          Gender (Female = 1)  4704     0.039     0.193    0     1
                                                     Position            <NA>      <NA> <NA>  <NA>
&nbsp;&nbsp;&nbsp;&nbsp;(Deputy) Minister for Foreign Affairs  6273     0.381     0.486    0     1
              &nbsp;&nbsp;&nbsp;&nbsp;(Deputy) Prime Minister  6273     0.198     0.398    0     1
                    &nbsp;&nbsp;&nbsp;&nbsp;(Vice-) President  6273     0.328      0.47    0     1
          

### Summary Statistics Emotionality Scoring - Subsamples

In [29]:
test_vars = ['gender_dummy', 'position']
table_labels = {'gender_dummy': 'Gender', 'position': 'Position'}

summary_list = []

for var in test_vars:
    scores = un_corpus_scored['score']

    # Groups
    group_non_missing = scores[un_corpus_scored[var].notna()]
    group_missing = scores[un_corpus_scored[var].isna()]

    # Means
    mean_non_missing = round(group_non_missing.mean(), 3)
    mean_missing = round(group_missing.mean(), 3)

    # t-test
    t_stat, p_val = stats.ttest_ind(group_non_missing, group_missing, nan_policy='omit')
    t_stat = round(t_stat, 3)
    p_val = round(p_val, 3)  

    summary_list.append({
        'Variable': table_labels[var],
        'Obs (Non-Missing)': len(group_non_missing),
        'Obs (Missing)': len(group_missing),
        'Mean (Non-Missing)': mean_non_missing,
        'Mean (Missing)': mean_missing,
        't-test': t_stat,
        'p-value': p_val
    })

# Convert to DataFrame
summary_df = pd.DataFrame(summary_list)

# --- Styling ---
styled_table = (
    summary_df.style
    .hide(axis="index")
    .set_table_styles([
        {'selector': 'th', 'props': [
            ('border-bottom', '3px solid black'), 
            ('color', 'black'),
            ('font-weight', 'bold'),
            ('text-align', 'center'),
            ('background-color', 'white')
        ]},
        {'selector': 'th.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td', 'props': [('text-align', 'center')]},
    ])
    .set_properties(**{'text-align': 'center'})
    .format({
        "Obs (Non-Missing)": "{:.0f}",
        "Obs (Missing)": "{:.0f}",
        "Mean (Non-Missing)": "{:.3f}",
        "Mean (Missing)": "{:.3f}",
        "t-test": "{:.3f}",
        "p-value": "{:.3f}"
    }, na_rep="-")
)

# --- Export HTML ---
styled_table.to_html("TTest_Scoring_Gender_Position.html")

# --- Export LaTeX ---
latex_ready = summary_df.copy()

latex_table = latex_ready.to_latex(
    index=False,
    na_rep="",
    float_format="%.3f",
    column_format="lrrrrrr",
    caption="Emotionality Scoring of the Subsamples with T-Tests",
    label="tab:summary_stats_ttest",
    header=["Variable", "Obs (Non-Missing)", "Obs (Missing)", 
            "Mean (Non-Missing)", "Mean (Missing)", "t-test", "p-value"],
    bold_rows=False,
    escape=False
)

# Replace top/mid/bottom rules for nicer formatting
latex_table = (
    latex_table.replace("\\toprule", "\\hline\\hline")
               .replace("\\midrule", "\\hline")
               .replace("\\bottomrule", "\\hline\\hline")
)

with open("TTest_Scoring_Gender_Position.tex", "w", encoding="utf-8") as f:
    f.write(latex_table)

# Print summary table in console
print(summary_df.to_string(index=False))



Variable  Obs (Non-Missing)  Obs (Missing)  Mean (Non-Missing)  Mean (Missing)  t-test  p-value
  Gender               4704           6248               0.820           0.800   5.136      0.0
Position               6273           4679               0.793           0.829  -9.420      0.0


### Summary Statistics Emotionality Scoring - per Decade

In [10]:
decade_start = (np.floor((un_corpus_scored['year'] - 1946) / 10) * 10 + 1946).astype(int)
decade_end = decade_start + 9
decade_end = decade_end.where(decade_end < 2024, 2024)

un_corpus_scored['Decade'] = decade_start.astype(str) + "â€“" + decade_end.astype(str)

decade_summary = (
    un_corpus_scored.groupby('Decade')['score']
    .agg(Obs='count', Mean='mean', SD='std', Min='min', Max='max')
    .reset_index()
)

numeric_cols = ['Mean', 'SD', 'Min', 'Max']
decade_summary[numeric_cols] = decade_summary[numeric_cols].round(3)

styled_decade_table = (
    decade_summary.style
    .hide(axis="index")
    .set_table_styles([
        {'selector': 'th', 'props': [
            ('border-bottom', '3px solid black'),
            ('color', 'black'),
            ('font-weight', 'bold'),
            ('text-align', 'center'),
            ('background-color', 'white')
        ]},
        {'selector': 'th.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col1', 'props': [('min-width', '80px')]},
        {'selector': 'td.col2', 'props': [('min-width', '80px')]},
        {'selector': 'td.col3', 'props': [('min-width', '80px')]},
        {'selector': 'td.col4', 'props': [('min-width', '80px')]},
        {'selector': 'td.col5', 'props': [('min-width', '80px')]}
    ])
    .set_properties(**{'text-align': 'center'}, subset=['Obs', 'Mean', 'SD', 'Min', 'Max'])
    .format({
        'Obs': '{:.0f}',
        'Mean': '{:.3f}',
        'SD': '{:.3f}',
        'Min': '{:.3f}',
        'Max': '{:.3f}'
    }, na_rep='-')
)

# --- EXPORT HTML ---
styled_decade_table.to_html("Scoring_per_Decade.html")

# --- EXPORT LaTeX ---
latex_table = decade_summary.to_latex(
    index=False,
    na_rep="",
    float_format="%.3f",
    column_format="lrrrrr",
    caption="Emotionality Scoring by Decade",
    label="tab:summary_decade",
    header=["Decade", "Obs", "Mean", "SD", "Min", "Max"],
    bold_rows=False,
    escape=False
)

latex_table = (
    latex_table.replace("\\toprule", "\\hline\\hline")
               .replace("\\midrule", "\\hline")
               .replace("\\bottomrule", "\\hline\\hline")
)

with open("Scoring_per_Decade.tex", "w", encoding="utf-8") as f:
    f.write(latex_table)

### Summary Statistics Emotionaly Scoring for categorial variables

In [12]:
group_vars = ['english_official_language', 'security_council_permanent', 'gender_dummy'] + list(position_dummies.columns)

var_labels = {
    'english_official_language': 'English as Official Language',
    'security_council_permanent': 'Permanent Member of the Security Council',
    'gender_dummy': 'Gender',
}

value_labels = {
    'english_official_language': {1: 'Yes (=1)', 0: 'No (=0)'},
    'security_council_permanent': {1: 'Yes (=1)', 0: 'No (=0)'},
    'gender_dummy': {1: 'Female (=1)', 0: 'Male (=0)'}
}

summary_list = []
position_header_inserted = False

for var in group_vars:
    # Insert "Position" header row once before position dummies
    if var not in value_labels and not position_header_inserted:
        position_header = pd.DataFrame({
            'Variable': ['Position'],
            'Obs': [""],
            'Mean': [""],
            'SD': [""],
            'Min': [""],
            'Max': [""]
        })
        summary_list.append(position_header)
        position_header_inserted = True
    
   
    if var not in value_labels:
        subset = un_corpus_scored[un_corpus_scored[var] == 1]
        summary_list.append(pd.DataFrame({
            'Variable': [f"&nbsp;&nbsp;&nbsp;&nbsp;{var_labels.get(var, var)}"],
            'Obs': [subset['score'].count()],
            'Mean': [subset['score'].mean()],
            'SD': [subset['score'].std()],
            'Min': [subset['score'].min()],
            'Max': [subset['score'].max()]
        }))
    
   
    if var in value_labels:
        # Variable header
        summary_list.append(pd.DataFrame({
            'Variable': [var_labels[var]],
            'Obs': [""],
            'Mean': [""],
            'SD': [""],
            'Min': [""],
            'Max': [""]
        }))
        for val in sorted(un_corpus_scored[var].dropna().unique()):
            subset = un_corpus_scored[un_corpus_scored[var] == val]
            summary_list.append(pd.DataFrame({
                'Variable': [f"&nbsp;&nbsp;&nbsp;&nbsp;{value_labels[var][val]}"],
                'Obs': [subset['score'].count()],
                'Mean': [subset['score'].mean()],
                'SD': [subset['score'].std()],
                'Min': [subset['score'].min()],
                'Max': [subset['score'].max()]
            }))


score_summary_table = pd.concat(summary_list, ignore_index=True)


numeric_cols = ['Mean', 'SD', 'Min', 'Max']
score_summary_table[numeric_cols] = score_summary_table[numeric_cols].round(3)
score_summary_table[numeric_cols] = score_summary_table[numeric_cols].replace("", pd.NA)

styled_score_table = (
    score_summary_table.style
    .hide(axis="index")
    .set_table_styles([
        {'selector': 'th', 'props': [
            ('border-bottom', '3px solid black'),
            ('color', 'black'),
            ('font-weight', 'bold'),
            ('text-align', 'center'),
            ('background-color', 'white')
        ]},
        {'selector': 'th.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col1', 'props': [('min-width', '80px')]},
        {'selector': 'td.col2', 'props': [('min-width', '80px')]},
        {'selector': 'td.col3', 'props': [('min-width', '80px')]},
        {'selector': 'td.col4', 'props': [('min-width', '80px')]},
        {'selector': 'td.col5', 'props': [('min-width', '80px')]}
    ])
    .set_properties(**{'text-align': 'center'}, subset=['Obs','Mean','SD','Min','Max'])
    .format({col: "{:.3f}" for col in numeric_cols})
)

# --- EXPORT HTML ---
styled_score_table.to_html("Scoring_categorial_variables.html")

# --- EXPORT LaTeX ---
latex_table = score_summary_table.copy()

latex_table['Variable'] = latex_table['Variable'].apply(
    lambda x: str(x).replace("&nbsp;&nbsp;&nbsp;&nbsp;", "\\hspace*{1em}") if isinstance(x, str) else x
)

latex_str = latex_table.to_latex(
    index=False,
    na_rep="",
    float_format="%.3f",
    column_format="lrrrrr",
    caption="Emotionality Scoring for the categorial variables",
    label="tab:conditional_vars",
    escape=False
)


latex_str = latex_str.replace("\\toprule", "\\hline\\hline") \
                     .replace("\\midrule", "\\hline") \
                     .replace("\\bottomrule", "\\hline\\hline")

with open("Scoring_categorial_variable.tex", "w", encoding="utf-8") as f:
    f.write(latex_str)

In [37]:

# --- Filter data from 1994 onward ---
df_filtered = un_corpus_scored[un_corpus_scored['year'] >= 1994]

# --- Only position dummies ---
position_vars = list(position_dummies.columns)

summary_list = []

# Insert "Position" header once
position_header = pd.DataFrame({
    'Variable': ['Position'],
    'Obs': [""],
    'Mean': [""],
    'SD': [""],
    'Min': [""],
    'Max': [""]
})
summary_list.append(position_header)

# Loop through positions
for var in position_vars:
    subset = df_filtered[df_filtered[var] == 1]
    summary_list.append(pd.DataFrame({
        'Variable': [f"&nbsp;&nbsp;&nbsp;&nbsp;{var_labels.get(var, var)}"],
        'Obs': [subset['score'].count()],
        'Mean': [subset['score'].mean()],
        'SD': [subset['score'].std()],
        'Min': [subset['score'].min()],
        'Max': [subset['score'].max()]
    }))

# Concatenate
score_summary_table = pd.concat(summary_list, ignore_index=True)

# Round numeric columns
numeric_cols = ['Mean', 'SD', 'Min', 'Max']
score_summary_table[numeric_cols] = score_summary_table[numeric_cols].round(3)
score_summary_table[numeric_cols] = score_summary_table[numeric_cols].replace("", pd.NA)

# --- Styling ---
styled_score_table = (
    score_summary_table.style
    .hide(axis="index")
    .set_table_styles([
        {'selector': 'th', 'props': [
            ('border-bottom', '3px solid black'),
            ('color', 'black'),
            ('font-weight', 'bold'),
            ('text-align', 'center'),
            ('background-color', 'white')
        ]},
        {'selector': 'th.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td', 'props': [('text-align', 'center')]}
    ])
    .set_properties(**{'text-align': 'center'}, subset=['Obs','Mean','SD','Min','Max'])
    .format({col: "{:.3f}" for col in numeric_cols})
)

# --- Export HTML ---
styled_score_table.to_html("Scoring_positions_from1994.html")

# --- Export LaTeX ---
latex_table = score_summary_table.copy()
latex_table['Variable'] = latex_table['Variable'].apply(
    lambda x: str(x).replace("&nbsp;&nbsp;&nbsp;&nbsp;", "\\hspace*{1em}") if isinstance(x, str) else x
)

latex_str = latex_table.to_latex(
    index=False,
    na_rep="",
    float_format="%.3f",
    column_format="lrrrrr",
    caption="Emotionality Scoring for the positions (1994 and later)",
    label="tab:positions_1994",
    escape=False
)

latex_str = latex_str.replace("\\toprule", "\\hline\\hline") \
                     .replace("\\midrule", "\\hline") \
                     .replace("\\bottomrule", "\\hline\\hline")

with open("Scoring_positions_from1994.tex", "w", encoding="utf-8") as f:
    f.write(latex_str)



In [48]:
# --- Filter data from 1994 onward ---
df_filtered = un_corpus_scored[un_corpus_scored['year'] >= 1994]

# --- Only position dummies ---
position_vars = list(position_dummies.columns)

summary_list = []

# Insert "Position" header once
position_header = pd.DataFrame({
    'Variable': ['Position'],
    'Obs': [""],
    'Mean': [""],
    'SD': [""],
    'Min': [""],
    'Max': [""]
})
summary_list.append(position_header)

# Loop through positions
for var in position_vars:
    subset = df_filtered[df_filtered[var] == 1]
    summary_list.append(pd.DataFrame({
        'Variable': [f"&nbsp;&nbsp;&nbsp;&nbsp;{var_labels.get(var, var)}"],
        'Obs': [subset['score'].count()],
        'Mean': [subset['score'].mean()],
        'SD': [subset['score'].std()],
        'Min': [subset['score'].min()],
        'Max': [subset['score'].max()]
    }))

# Add overall row for 1994-2024
overall_subset = df_filtered['score']
overall_row = pd.DataFrame({
    'Variable': ['Overall'],
    'Obs': [overall_subset.count()],
    'Mean': [overall_subset.mean()],
    'SD': [overall_subset.std()],
    'Min': [overall_subset.min()],
    'Max': [overall_subset.max()]
})
summary_list.append(overall_row)

# Concatenate
score_summary_table = pd.concat(summary_list, ignore_index=True)

# Round numeric columns
numeric_cols = ['Mean', 'SD', 'Min', 'Max']
score_summary_table[numeric_cols] = score_summary_table[numeric_cols].round(3)
score_summary_table[numeric_cols] = score_summary_table[numeric_cols].replace("", pd.NA)

# --- Styling ---
styled_score_table = (
    score_summary_table.style
    .hide(axis="index")
    .set_table_styles([
        {'selector': 'th', 'props': [
            ('border-bottom', '3px solid black'),
            ('color', 'black'),
            ('font-weight', 'bold'),
            ('text-align', 'center'),
            ('background-color', 'white')
        ]},
        {'selector': 'th.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td.col0', 'props': [('text-align', 'left')]},
        {'selector': 'td', 'props': [('text-align', 'center')]}
    ])
    .set_properties(**{'text-align': 'center'}, subset=['Obs','Mean','SD','Min','Max'])
    .format({col: "{:.3f}" for col in numeric_cols})
)

# --- Export HTML ---
styled_score_table.to_html("Scoring_positions_from1994.html")

# --- Export LaTeX ---
latex_table = score_summary_table.copy()
latex_table['Variable'] = latex_table['Variable'].apply(
    lambda x: str(x).replace("&nbsp;&nbsp;&nbsp;&nbsp;", "\\hspace*{1em}") if isinstance(x, str) else x
)

latex_str = latex_table.to_latex(
    index=False,
    na_rep="",
    float_format="%.3f",
    column_format="lrrrrr",
    caption="Emotionality Scoring for the positions (1994 and later)",
    label="tab:positions_1994",
    escape=False
)

latex_str = latex_str.replace("\\toprule", "\\hline\\hline") \
                     .replace("\\midrule", "\\hline") \
                     .replace("\\bottomrule", "\\hline\\hline")

with open("Scoring_positions_from1994.tex", "w", encoding="utf-8") as f:
    f.write(latex_str)


In [60]:
## Appendix

pd.set_option("display.max_rows", None)

gender_per_year = (
    un_corpus_scored
    .groupby(['year', 'gender_dummy'])
    .size()
    .unstack(fill_value=0)
)

print(gender_per_year)

pd.reset_option("display.max_rows")


gender_dummy  0.0  1.0
year                  
1946           35    1
1947           33    1
1948           35    1
1949           33    0
1950           42    0
1951           43    1
1952           40    2
1953           43    0
1954           41    0
1955           43    0
1956           64    1
1957           68    1
1958           67    1
1959           77    1
1960           71    1
1961           76    1
1962           82    1
1963           61    1
1964           94    0
1965           96    1
1966          103    0
1967          107    0
1968          107    1
1969          113    0
1970           70    0
1971          112    1
1972          108    0
1973            1    0
1974          120    0
1975          118    2
1976          123    2
1977            1    1
1981          129    3
1982            5    0
1983            3    3
1985          129    1
1986          134    4
1987            8    1
1988           49    0
1989            6    2
1990            1    0
1991       