Team members : Thibault GAUTHÃ‰, Nathan GEORGES, Thomas JIN, Yijia ZENG, Zilong XU

Dataset : Humanitarian Aid : Country level data / Personnel 



In [3]:
# Import libraries

import pandas as pd
import numpy as np
import plotly.express as px  
import matplotlib.pyplot as plt 

In [4]:
# Function definitions

def load_data(file_path):
    """
    Function for reading a CSV file.
    Input: the path to the csv file.
    Output: a dataframe.
    """
    return pd.read_csv(file_path)

def preprocess_data(df):
    """
    Cleans the dataframe: converts dates and fills missing values.
    """
    # Convert Date column to datetime objects
    df['Date'] = pd.to_datetime(df['Date'])
    
    # Fill NaN values with 0 for numerical contribution columns
    fill_cols = ['Troop Contributions', 'Police Contributions', 
                 'EOM Contributions', 'Total Contributions']
    df[fill_cols] = df[fill_cols].fillna(0)
    
    return df


def get_global_trend(df):
    """
    Indicator 1: Evolution of Total Contributions over time.
    """
    trend = df.groupby('Date')['Total Contributions'].sum().reset_index()
    return trend

def get_top_contributors(df, n=10):
    """
    Indicator 2: Top n contributing countries (historical sum).
    """
    top = df.groupby('Contributor')['Total Contributions'].sum().nlargest(n).reset_index()
    return top

def get_contribution_composition(df):
    """
    Indicator 3: Total split between Troops, Police, and EOM.
    """
    # Summing up the specific columns
    composition = df[['Troop Contributions', 'Police Contributions', 'EOM Contributions']].sum().reset_index()
    composition.columns = ['Type', 'Count'] # Rename for easier plotting
    return composition

def get_regional_distribution(df):
    """
    Indicator 4: Total contributions grouped by Region.
    """
    region_dist = df.groupby('Contributor Region')['Total Contributions'].sum().reset_index()
    return region_dist

In [5]:
# main block
if __name__ == "__main__":
    # --- Step 1: Data Collection & Cleaning ---
    file_path = "country_level_data.csv"
    
    # Load and clean
    print("Loading data...")
    df_raw = load_data(file_path)
    df = preprocess_data(df_raw)
    print(f"Data loaded. Shape: {df.shape}")
    print(f"Date range: {df['Date'].min().date()} to {df['Date'].max().date()}")

    # --- Step 2: Compute Indicators ---
    trend_df = get_global_trend(df)
    top_df = get_top_contributors(df, n=10)
    comp_df = get_contribution_composition(df)
    reg_df = get_regional_distribution(df)
    
    # --- Step 3: Visualization (Inline) ---
    
    # 1. Global Trend Line Chart
    fig1 = px.line(trend_df, x='Date', y='Total Contributions', 
                   title='Indicator 1: Global Peacekeeping Personnel Trend (1990-2017)')
    fig1.show()
    
    # 2. Top Contributors Bar Chart
    fig2 = px.bar(top_df, x='Total Contributions', y='Contributor', orientation='h',
                  title='Indicator 2: Top 10 Contributing Countries (Historical Total)',
                  text='Total Contributions')
    fig2.show()
    
    # 3. Composition Pie Chart
    fig3 = px.pie(comp_df, values='Count', names='Type', 
                  title='Indicator 3: Composition of Peacekeeping Forces',
                  color_discrete_sequence=px.colors.sequential.RdBu)
    fig3.show()
    
    # 4. Regional Distribution Bar Chart
    fig4 = px.bar(reg_df, x='Contributor Region', y='Total Contributions', 
                  title='Indicator 4: Contributions by Region',
                  color='Contributor Region')
    fig4.show()

Loading data...
Data loaded. Shape: (30095, 34)
Date range: 1990-11-30 to 2017-04-30
