## Section 1: Configuration and Setup

In [None]:
# Cell 1: Import Libraries and Configure Environment
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import seaborn as sns
import os
from pathlib import Path
from collections import Counter
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

print("Libraries imported successfully")

In [None]:
# Cell 2: Configure Paths and Analysis Parameters

# ================================================================
# RESEARCH CONFIGURATION - Document your choices here
# ================================================================

# Analysis parameters used in original research:
# These values define the analysis scope and groupings.
# Change these to replicate with different parameters, 
# but note that published figures used these exact values.

CRINK_COUNTRIES = {
    'CHINA': 'CHN',
    'RUSSIAN FEDERATION': 'RUS',
    'IRAN (ISLAMIC REPUBLIC OF)': 'IRN',
    "DEMOCRATIC PEOPLE'S REPUBLIC OF KOREA": 'PRK'
}

WESTERN_COUNTRIES = {
    'UNITED STATES': 'USA',
    'GERMANY': 'GER',
    'FRANCE': 'FR',
    'UNITED KINGDOM': 'UK'
}

# Time periods (research used these ranges)
UNGA_START_YEAR = 1991      # First UNGA data point
FIRST_COMMITTEE_START_YEAR = 2003  # First Committee data collection began
END_YEAR = 2024             # Latest data included

# Plot configuration
PLOT_STYLE = 'bw'           # 'bw' for black/white, 'color' for color plots
FIGURE_SIZE = (14, 5)       # Default figure size (width, height)
DPI = 300                   # Resolution for saved figures

print(f"CRINK Countries: {list(CRINK_COUNTRIES.keys())}")
print(f"Western Countries: {list(WESTERN_COUNTRIES.keys())}")
print(f"Analysis Period: {UNGA_START_YEAR}-{END_YEAR}")

In [None]:
# Cell 3: Set Up Data and Results Directories

# Get relative paths from current notebook location
notebook_dir = Path.cwd()
repo_root = notebook_dir.parent  # Go up from 'notebooks' folder
data_dir = repo_root / "data" / "processed"
results_dir = repo_root / "results"

# Create directories if they don't exist
data_dir.mkdir(parents=True, exist_ok=True)
results_dir.mkdir(parents=True, exist_ok=True)

print(f"Repository root: {repo_root}")
print(f"Data directory: {data_dir}")
print(f"Results directory: {results_dir}")
print(f"\nDirectory structure verified.")

## Section 2: Load and Prepare Data

In [None]:
# Cell 4: Load UN Voting Data

# Check for available datasets
csv_files = list(data_dir.glob('*.csv'))
print(f"Available CSV files in {data_dir}:")
for i, f in enumerate(csv_files, 1):
    print(f"  {i}. {f.name}")

# Select dataset - modify the dataset choice here
# Options: 'UNGA_voting_records_filtered.csv' (full dataset)
dataset_choice = 'UNGA_voting_records_filtered.csv'

csv_path = data_dir / dataset_choice

if not csv_path.exists():
    raise FileNotFoundError(
        f"Data file not found: {csv_path}\n"
        f"Please download processed data from Harvard Dataverse and place in {data_dir}"
    )

print(f"\nLoading: {dataset_choice}")
df = pd.read_csv(csv_path, encoding='utf-8')

print(f"✓ Data loaded successfully")
print(f"  Shape: {df.shape}")
print(f"  Columns: {df.columns.tolist()}")
print(f"\nFirst few rows:")
df.head()

In [None]:
# Cell 5: Data Cleaning and Standardization

# Standardize country names (some datasets use 'USSR' instead of 'RUSSIAN FEDERATION')
if 'ms_name' in df.columns:
    df['ms_name'] = df['ms_name'].replace({'USSR': 'RUSSIAN FEDERATION'})

# Convert date column to datetime and extract year
if 'date' in df.columns:
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
    df['year'] = df['date'].dt.year
elif 'year' not in df.columns:
    raise ValueError("Neither 'date' nor 'year' column found. Cannot determine analysis period.")

# Filter to UNGA period
df = df[(df['year'] >= UNGA_START_YEAR) & (df['year'] <= END_YEAR)].copy()

# Validate required columns
required_cols = ['ms_name', 'ms_vote', 'year']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
    raise ValueError(f"Missing required columns: {missing_cols}")

print(f"✓ Data cleaned and standardized")
print(f"  Time period: {df['year'].min()}-{df['year'].max()}")
print(f"  Countries: {df['ms_name'].nunique()}")
print(f"  Votes: {len(df)}")

## Section 3: CRINK Voting Alignment Analysis

In [None]:
# Cell 6: Create Vote Pivot and Calculate Group Alignment

# Create pivot table with one row per resolution
vote_pivot = df.pivot_table(
    index=['undl_id', 'date'],
    columns='ms_name',
    values='ms_vote',
    aggfunc='first'
).reset_index()

print(f"Created pivot table: {vote_pivot.shape[0]} resolutions")

# Extract CRINK votes
crink_countries = list(CRINK_COUNTRIES.keys())
vote_pivot['crink_votes'] = vote_pivot.apply(
    lambda row: [row[c] for c in crink_countries if c in row.index and pd.notna(row[c])],
    axis=1
)

# Calculate CRINK group vote (most common)
vote_pivot['crink_group_vote'] = vote_pivot['crink_votes'].apply(
    lambda votes: Counter(votes).most_common(1)[0][0] if votes else None
)

# Classify by agreement count
def classify_crink_agreement(votes):
    """Return agreement count if >= 2 countries agree, else None"""
    if len(votes) < 2:
        return None
    counts = Counter(votes)
    most_common_vote, count = counts.most_common(1)[0]
    return count if count >= 2 else None

vote_pivot['agreement_count'] = vote_pivot['crink_votes'].apply(classify_crink_agreement)
vote_pivot['year'] = pd.to_datetime(vote_pivot['date']).dt.year

print(f"✓ CRINK voting patterns calculated")
print(f"  Votes with 2+ agreement: {vote_pivot['agreement_count'].notna().sum()}")
print(f"  Votes with 3+ agreement: {(vote_pivot['agreement_count'] >= 3).sum()}")
print(f"  Votes with 4-way (unanimous): {(vote_pivot['agreement_count'] == 4).sum()}")

In [None]:
# Cell 7: Calculate UN Majority Vote and Alignment

# Get UN majority vote per resolution
def get_majority_vote(group):
    """Determine majority vote from UN voting record"""
    votes = group['ms_vote'].dropna()
    if len(votes) == 0:
        return None
    return votes.mode()[0] if len(votes.mode()) > 0 else None

if 'undl_id' in df.columns:
    majority_votes = df.groupby('undl_id').apply(get_majority_vote).to_dict()
    vote_pivot['un_majority_vote'] = vote_pivot['undl_id'].map(majority_votes)
    
    # Calculate alignment with majority
    vote_pivot['crink_with_majority'] = (
        (vote_pivot['crink_group_vote'] == vote_pivot['un_majority_vote']) & 
        vote_pivot['agreement_count'].notna()
    ).astype(int)
    
    print(f"✓ UN majority votes determined")
    print(f"  Resolutions analyzed: {len(vote_pivot)}")
    print(f"  CRINK with majority: {vote_pivot['crink_with_majority'].sum()} ({100*vote_pivot['crink_with_majority'].sum()/len(vote_pivot):.1f}%)")
else:
    print("Warning: 'undl_id' column not found. Skipping majority alignment calculation.")

## Section 4: Generate Visualizations

In [None]:
# Cell 8: Prepare Time Series Data

# Aggregate by year
yearly_stats = vote_pivot.groupby('year').agg(
    total_votes=('agreement_count', 'count'),
    votes_2way=('agreement_count', lambda x: (x == 2).sum()),
    votes_3way=('agreement_count', lambda x: (x == 3).sum()),
    votes_4way=('agreement_count', lambda x: (x == 4).sum()),
    votes_with_majority=('crink_with_majority', 'sum')
).reset_index()

# Calculate percentages
yearly_stats['pct_4way'] = 100 * yearly_stats['votes_4way'] / yearly_stats['total_votes']
yearly_stats['pct_3way'] = 100 * yearly_stats['votes_3way'] / yearly_stats['total_votes']
yearly_stats['pct_2way'] = 100 * yearly_stats['votes_2way'] / yearly_stats['total_votes']
yearly_stats['pct_majority'] = 100 * yearly_stats['votes_with_majority'] / yearly_stats['total_votes']

print(f"✓ Time series data prepared")
print(f"\nYearly summary (first 5 years):")
print(yearly_stats[['year', 'total_votes', 'votes_4way', 'pct_4way', 'pct_majority']].head())

In [None]:
# Cell 9: Figure 1 - CRINK Voting Cohesion and Majority Alignment Over Time

# Color configuration
if PLOT_STYLE == 'bw':
    line_color = 'black'
    majority_color = 'gray'
    linestyle = '-'
    majority_linestyle = '--'
else:
    line_color = '#E74C3C'  # Red
    majority_color = '#3498DB'  # Blue
    linestyle = '-'
    majority_linestyle = '-'

# Create figure
fig, ax = plt.subplots(figsize=FIGURE_SIZE, dpi=100)

# Plot 4-way alignment
ax.plot(
    yearly_stats['year'],
    yearly_stats['pct_4way'],
    label='CRINK Group of 4 Votes',
    color=line_color,
    linestyle=linestyle,
    linewidth=2.5,
    marker='o',
    markersize=5
)

# Plot majority alignment
ax.plot(
    yearly_stats['year'],
    yearly_stats['pct_majority'],
    label='CRINK Aligned with UN Majority',
    color=majority_color,
    linestyle=majority_linestyle,
    linewidth=2,
    marker='^',
    markersize=5
)

# Formatting
ax.set_xlabel('Year', fontsize=11, fontweight='bold')
ax.set_ylabel('Share of Votes (%)', fontsize=11, fontweight='bold')
ax.set_title(
    'CRINK Voting Cohesion and Alignment with UN Majority\nUNGA Plenary, 1991-2024',
    fontsize=12,
    fontweight='bold',
    pad=20
)
ax.set_ylim(0, 100)
ax.grid(axis='y', linestyle=':', alpha=0.5)
ax.legend(loc='best', frameon=False, fontsize=10)

# Set x-axis ticks
years = yearly_stats['year'].values
tick_interval = 5
ticks = list(range(int(years.min() // tick_interval) * tick_interval, int(years.max() + tick_interval), tick_interval))
ax.set_xticks(ticks)

plt.tight_layout()
plt.savefig(results_dir / 'figure_01_crink_cohesion.png', dpi=DPI, bbox_inches='tight')
print(f"✓ Figure 1 saved: figure_01_crink_cohesion.png")
plt.show()

In [None]:
# Cell 10: Figure 2 - Coalition Size Distribution

fig, ax = plt.subplots(figsize=FIGURE_SIZE, dpi=100)

# Color scheme for coalition types
if PLOT_STYLE == 'bw':
    colors = {'2way': '#CCCCCC', '3way': '#808080', '4way': '#000000'}
else:
    colors = {'2way': '#3498DB', '3way': '#F39C12', '4way': '#E74C3C'}

x = yearly_stats['year'].values
width = 0.6

# Stack bars
ax.bar(x, yearly_stats['pct_2way'], width, label='2-way', color=colors['2way'])
ax.bar(x, yearly_stats['pct_3way'], width, bottom=yearly_stats['pct_2way'], label='3-way', color=colors['3way'])
ax.bar(x, yearly_stats['pct_4way'], width, bottom=yearly_stats['pct_2way'] + yearly_stats['pct_3way'], label='4-way', color=colors['4way'])

# Formatting
ax.set_xlabel('Year', fontsize=11, fontweight='bold')
ax.set_ylabel('Share of CRINK Votes (%)', fontsize=11, fontweight='bold')
ax.set_title(
    'CRINK Coalition Size Distribution\nUNGA Plenary, 1991-2024',
    fontsize=12,
    fontweight='bold',
    pad=20
)
ax.set_ylim(0, 100)
ax.legend(loc='upper right', frameon=False, fontsize=10)
ax.grid(axis='y', linestyle=':', alpha=0.5)

plt.tight_layout()
plt.savefig(results_dir / 'figure_02_coalition_sizes.png', dpi=DPI, bbox_inches='tight')
print(f"✓ Figure 2 saved: figure_02_coalition_sizes.png")
plt.show()

## Section 5: Generate Summary Tables

In [None]:
# Cell 11: Calculate Dyadic Alignment Statistics

crink_list = list(CRINK_COUNTRIES.keys())

# Calculate pairwise alignment for all CRINK dyads
dyadic_alignment = {}

for i, country1 in enumerate(crink_list):
    for country2 in crink_list[i+1:]:
        # Find votes where both countries have a vote
        if country1 in vote_pivot.columns and country2 in vote_pivot.columns:
            both_voted = vote_pivot[
                (vote_pivot[country1].notna()) & (vote_pivot[country2].notna())
            ].copy()
            
            if len(both_voted) > 0:
                # Count identical votes
                identical = (both_voted[country1] == both_voted[country2]).sum()
                total = len(both_voted)
                alignment_pct = 100 * identical / total
                
                dyadic_key = f"{country1.split()[0]}-{country2.split()[0]}"
                dyadic_alignment[dyadic_key] = {
                    'country1': country1,
                    'country2': country2,
                    'identical_votes': identical,
                    'total_votes': total,
                    'alignment_pct': alignment_pct
                }

dyadic_df = pd.DataFrame(dyadic_alignment).T.reset_index(drop=True)
dyadic_df = dyadic_df[['country1', 'country2', 'alignment_pct', 'identical_votes', 'total_votes']]
dyadic_df = dyadic_df.sort_values('alignment_pct', ascending=False)

print("✓ Dyadic alignment calculated")
print(f"\nDyadic Alignment Statistics (CRINK Countries):")
print(dyadic_df.to_string(index=False))

In [None]:
# Cell 12: Save Results to CSV

timestamp = pd.Timestamp.now().strftime('%Y%m%d')

# Save yearly statistics
yearly_csv = results_dir / f'crink_yearly_statistics_{timestamp}.csv'
yearly_stats.to_csv(yearly_csv, index=False)
print(f"✓ Saved: {yearly_csv.name}")

# Save dyadic alignment
dyadic_csv = results_dir / f'crink_dyadic_alignment_{timestamp}.csv'
dyadic_df.to_csv(dyadic_csv, index=False)
print(f"✓ Saved: {dyadic_csv.name}")

# Save full vote pivot with CRINK analysis
votes_csv = results_dir / f'crink_voting_records_{timestamp}.csv'
vote_pivot[['undl_id', 'date', 'year', 'crink_group_vote', 'agreement_count', 'un_majority_vote', 'crink_with_majority']].to_csv(votes_csv, index=False)
print(f"✓ Saved: {votes_csv.name}")

print(f"\nAll results saved to: {results_dir}")

## Section 6: Summary Statistics

In [None]:
# Cell 13: Generate Summary Report

print("\n" + "="*80)
print("CRINK VOTING ALIGNMENT ANALYSIS - SUMMARY REPORT")
print("="*80)

print(f"\nANALYSIS PERIOD: {UNGA_START_YEAR}-{END_YEAR}")
print(f"\nDATA SUMMARY:")
print(f"  Total resolutions: {len(vote_pivot)}")
print(f"  Total country votes: {len(df)}")
print(f"  Countries analyzed: {df['ms_name'].nunique()}")

print(f"\nCRINK VOTING PATTERNS:")
print(f"  Resolutions with 2+ CRINK agreement: {vote_pivot['agreement_count'].notna().sum()} ({100*vote_pivot['agreement_count'].notna().sum()/len(vote_pivot):.1f}%)")
print(f"  Resolutions with 3-way alignment: {(vote_pivot['agreement_count'] >= 3).sum()} ({100*(vote_pivot['agreement_count'] >= 3).sum()/len(vote_pivot):.1f}%)")
print(f"  Resolutions with 4-way (unanimous): {(vote_pivot['agreement_count'] == 4).sum()} ({100*(vote_pivot['agreement_count'] == 4).sum()/len(vote_pivot):.1f}%)")

print(f"\nALIGNMENT WITH UN MAJORITY:")
print(f"  Average alignment: {yearly_stats['pct_majority'].mean():.1f}%")
print(f"  Highest year: {yearly_stats.loc[yearly_stats['pct_majority'].idxmax(), 'year']:.0f} ({yearly_stats['pct_majority'].max():.1f}%)")
print(f"  Lowest year: {yearly_stats.loc[yearly_stats['pct_majority'].idxmin(), 'year']:.0f} ({yearly_stats['pct_majority'].min():.1f}%)")

print(f"\nDYADIC ALIGNMENT (All Pairs):")
for idx, row in dyadic_df.iterrows():
    print(f"  {row['country1'].split()[0]}-{row['country2'].split()[0]}: {row['alignment_pct']:.1f}%")

print(f"\n" + "="*80)
print("Analysis complete. Check results folder for figures and data files.")
print("="*80)