In [25]:
# 25 May 2025

<h1 style="line-height:3rem;"> Everest Expeditions </h1>

In [5]:
import pandas as pd
import os

def load_everest_data():
    """
    Load Everest.csv from local or Kaggle environment.
    Returns:
        DataFrame: Everest expedition data.
    """
    env_name = os.environ.get('KAGGLE_KERNEL_RUN_TYPE', 'Localhost')
    if env_name == "Localhost":
        df = pd.read_csv('../datasets/New/Everest_Expedition.csv', low_memory=False)
    else:
        df = pd.read_csv("/kaggle/input/everest-expedition/Everest_Expedition.csv", low_memory=False)
    return df

def top_n_national_success(df, top_n=10, include_countries=None):
    """
    Get top N nationalities by total successful summits, including specified countries.
    
    Args:
        df (DataFrame): Everest expedition data.
        top_n (int): Number of top nationalities to return.
        include_countries (list): List of countries to always include.
        
    Returns:
        DataFrame: Filtered and sorted nationality success DataFrame.
    """
    # Group by nation and sum successful summits
    nation_success = (
        df.groupby('nation')['smtmembers']
        .sum()
        .reset_index()
        .rename(columns={'nation': 'Nation', 'smtmembers': 'Total Successful Summits'})
    )
    
    # Sort and get top N
    top_df = nation_success.sort_values(by='Total Successful Summits', ascending=False).head(top_n)

    # Include specific countries even if not in top N
    if include_countries:
        include_countries_clean = [c.strip() for c in include_countries]
        extra_df = nation_success[nation_success['Nation'].isin(include_countries_clean)]
        combined_df = pd.concat([top_df, extra_df], ignore_index=True)
        combined_df = combined_df.drop_duplicates(subset='Nation')
        combined_df = combined_df.sort_values(by='Total Successful Summits', ascending=False).reset_index(drop=True)
    else:
        combined_df = top_df.reset_index(drop=True)

    return combined_df

def style_gradient_table(df):
    """
    Apply a gradient style to the Total Successful Summits column.
    
    Args:
        df (DataFrame): DataFrame of nationality success.
        
    Returns:
        Styler: Styled DataFrame.
    """
    return df.style.background_gradient(subset=['Total Successful Summits'], cmap='YlGnBu')

# Example usage
# Example usage
if __name__ == "__main__":
    df = load_everest_data()
    include_countries = ["Finland", "Bangladesh", "Pakistan"]
    top_n_df = top_n_national_success(df, top_n=40, include_countries=include_countries)

    # For Jupyter or Kaggle Notebooks, explicitly display the styled table
    from IPython.display import display
    styled_table = style_gradient_table(top_n_df)
    display(styled_table)



Unnamed: 0,Nation,Total Successful Summits
0,USA,2950
1,UK,2204
2,Japan,1959
3,France,1367
4,China,1305
5,Nepal,1276
6,India,1001
7,Germany,988
8,Spain,885
9,Switzerland,869


In [13]:


df['year'] = df['year'].astype(int)

df['year_interval'] = (df['year'] // 5) * 5

df['total_success'] = (
    df[['success1', 'success2', 'success3', 'success4']]
    .map(lambda x: str(x).strip().upper() == 'TRUE')
    .sum(axis=1)
)

interval_success = df.groupby('year_interval')['total_success'].sum().reset_index()

interval_success.columns = ['5-Year Interval Start', 'Total Successful Summits']

styled_table = interval_success.style.background_gradient(
    subset=['Total Successful Summits'],
    cmap='YlGnBu'
).set_properties(**{'text-align': 'center'}).set_caption("Total Successful Summits in 5-Year Intervals")

styled_table


Unnamed: 0,5-Year Interval Start,Total Successful Summits
0,1905,1
1,1910,1
2,1920,0
3,1925,0
4,1930,4
5,1935,7
6,1940,0
7,1945,4
8,1950,29
9,1955,30


In [25]:


# Normalize success columns to boolean
for col in ['success1', 'success2', 'success3', 'success4']:
    df[col] = df[col].astype(str).str.strip().str.upper() == 'TRUE'

# One expedition is considered a success if any success is True
df['exp_success'] = df[['success1', 'success2', 'success3', 'success4']].any(axis=1).astype(int)
df['exp_failure'] = (~df[['success1', 'success2', 'success3', 'success4']].any(axis=1)).astype(int)

# Group by nation and calculate total success/failure
nation_stats = df.groupby('nation').agg({
    'exp_success': 'sum',
    'exp_failure': 'sum'
}).reset_index()

# Compute success/failure ratio
nation_stats['success_failure_ratio'] = nation_stats['exp_success'] / nation_stats['exp_failure'].replace(0, pd.NA)

# Sort by ratio
nation_stats = nation_stats.sort_values(by='success_failure_ratio', ascending=False)

# Apply gradient style
styled_table = nation_stats.style.background_gradient(
    subset=['success_failure_ratio'],
    cmap='Greens'
).format({
    'exp_success': '{:.0f}',
    'exp_failure': '{:.0f}',
    'success_failure_ratio': '{:.2f}'
}).set_caption("Expedition-Level Success/Failure Ratio by Nation")

styled_table


Unnamed: 0,nation,exp_success,exp_failure,success_failure_ratio
45,Kazakhstan,22,4,5.5
91,USSR,15,3,5.0
18,China,182,45,4.04
48,Kyrgyz Republic,4,1,4.0
59,Nepal,303,84,3.61
20,Croatia,7,2,3.5
57,Mongolia,7,2,3.5
49,Latvia,6,2,3.0
26,Estonia,3,1,3.0
40,Ireland,25,9,2.78
