In [2]:
import pandas as pd

def classify_oecd_countries(file_path):
    # List of main OECD countries
    oecd_countries = [
        "Australia", "Austria", "Belgium", "Canada", "Chile", "Colombia", 
        "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", 
        "Greece", "Hungary", "Iceland", "Ireland", "Israel", "Italy", "Japan", 
        "Korea", "Latvia", "Lithuania", "Luxembourg", "Mexico", "Netherlands", 
        "New Zealand", "Norway", "Poland", "Portugal", "Slovak Republic", 
        "Slovenia", "Spain", "Sweden", "Switzerland", "Turkey", "United Kingdom", "United States"
    ]

    # Load the data
    data = pd.read_csv(file_path)

    # Filter the dataset for OECD countries
    oecd_data = data[data['Entity'].isin(oecd_countries)]

    # Calculate the average P-score for each OECD country
    average_p_scores = oecd_data.groupby('Entity')['p_proj_all_ages'].mean().sort_values()

    # Determining the percentile thresholds
    high_threshold = average_p_scores.quantile(2/3)  # Top third percentile
    low_threshold = average_p_scores.quantile(1/3)   # Bottom third percentile

    # Classifying countries
    classification = pd.cut(average_p_scores, 
                            bins=[-float('inf'), low_threshold, high_threshold, float('inf')], 
                            labels=['Low', 'Moderate', 'High'])

    # Creating a DataFrame for better visualization
    classification_df = pd.DataFrame({
        'Country': classification.index,
        'Average P-Score': average_p_scores.values,
        'Classification': classification.values
    })

    return classification_df

# Replace 'your_file_path.csv' with the actual path to the CSV file
file_path = '../data/excess-mortality-p-scores-projected-baseline.csv'
classification_result = classify_oecd_countries(file_path)
print(classification_result)


           Country  Average P-Score Classification
0       Luxembourg        -2.203112            Low
1      New Zealand         0.134100            Low
2          Denmark         2.341650            Low
3            Japan         3.284889            Low
4           France         4.940808            Low
5           Sweden         4.975692            Low
6        Australia         5.340802            Low
7           Norway         5.462750            Low
8           Canada         5.859341            Low
9          Germany         5.936400            Low
10         Finland         6.107727            Low
11         Iceland         6.888061       Moderate
12        Slovenia         7.103636       Moderate
13         Belgium         7.162677       Moderate
14         Estonia         7.255871       Moderate
15     Switzerland         7.322700       Moderate
16          Israel         7.344256       Moderate
17        Portugal         7.399337       Moderate
18         Hungary         7.85

In [4]:
import plotly.express as px

def plot_choropleth(file_path):
    # Load the data
    data = pd.read_csv(file_path)

    # Calculate the average P-score for each country
    average_p_scores = data.groupby('Entity')['p_proj_all_ages'].mean().reset_index()

    # Create a choropleth map
    fig = px.choropleth(
        average_p_scores,
        locations="Entity",
        locationmode="country names",
        color="p_proj_all_ages",
        hover_name="Entity",
        color_continuous_scale=["green", "yellow", "red"],
        title="Average Excess Mortality P-Scores by Country"
    )

    # Show the plot
    fig.show()
    
plot_choropleth(file_path)
