In [None]:
pip install geopy

In [1]:
#Loading required libraries
import pandas as pd
from geopy.geocoders import ArcGIS

In [55]:
#reading the mountain deaths data
death_data = pd.read_csv('deaths_on_eight-thousanders.csv')
print(death_data.head())

         Date             Name     Nationality  \
0  2023-07-27  Muhammad Hassan        Pakistan   
1  2022-07-22    Matthew Eakin       Australia   
2  2022-07-22  Richard Cartier          Canada   
3  2022-07-21  Ali Akbar Sakhi     Afghanistan   
4  2021-07-25       Rick Allen  United Kingdom   

                         Cause of death Mountain  
0                               Unknown       K2  
1                                  Fall       K2  
2                                  Fall       K2  
3  Unknown, suspected altitude sickness       K2  
4                             Avalanche       K2  


In [56]:
#renaming the column names
new_col_names = {
    'Date': 'date',
    'Name': 'climber_name',
    'Nationality': 'climber_nationality',
    'Cause of death': 'cause_of_death',
    'Mountain': 'mountain_name'
}

# Rename the columns
death_data.rename(columns=new_col_names, inplace=True)
print(death_data.head())

         date     climber_name climber_nationality  \
0  2023-07-27  Muhammad Hassan            Pakistan   
1  2022-07-22    Matthew Eakin           Australia   
2  2022-07-22  Richard Cartier              Canada   
3  2022-07-21  Ali Akbar Sakhi         Afghanistan   
4  2021-07-25       Rick Allen      United Kingdom   

                         cause_of_death mountain_name  
0                               Unknown            K2  
1                                  Fall            K2  
2                                  Fall            K2  
3  Unknown, suspected altitude sickness            K2  
4                             Avalanche            K2  


In [57]:
# Load the Mountain.csv file into a DataFrame
mountain_data = pd.read_csv('Mountain.csv')

# Display the first few rows of the DataFrame
print(mountain_data.head())

# Define the new column names mapping
new_col_names = {
    'Mountain': 'mountain_name',
    'Metres': 'height_meters',
    'Feet': 'height_feet',
    'Range': 'range',
    'Location': 'mountain_location'
}

# Rename the columns
mountain_data.rename(columns=new_col_names, inplace=True)

# Display the first few rows of the DataFrame to verify the column names
print(mountain_data.head())

        Mountain  Metres   Feet        Range  \
0  Mount Everest  8848.0  29029  Himalayas     
1             K2  8612.0  28255  Karakoram     
2  Kangchenjunga  8586.0  28169  Himalayas     
3         Lhotse  8516.0  27940  Himalayas     
4         Makalu  8485.0  27838  Himalayas     

                                            Location  
0                                        Nepal/China  
1                                     Pakistan/China  
2                                        Nepal/India  
3  Nepal – Climbers ascend Lhotse Face in climbin...  
4                                              Nepal  
   mountain_name  height_meters  height_feet        range  \
0  Mount Everest         8848.0        29029  Himalayas     
1             K2         8612.0        28255  Karakoram     
2  Kangchenjunga         8586.0        28169  Himalayas     
3         Lhotse         8516.0        27940  Himalayas     
4         Makalu         8485.0        27838  Himalayas     

              

In [58]:
# Merge the two datasets based on the 'mountain_name' column using a left merge
combined_data = pd.merge(death_data, mountain_data, on='mountain_name', how='left')
# Display the combined dataset
print(combined_data)

            date          climber_name climber_nationality  \
0     2023-07-27       Muhammad Hassan            Pakistan   
1     2022-07-22         Matthew Eakin           Australia   
2     2022-07-22       Richard Cartier              Canada   
3     2022-07-21       Ali Akbar Sakhi         Afghanistan   
4     2021-07-25            Rick Allen      United Kingdom   
...          ...                   ...                 ...   
1073  2023-05-21       Ang Kami Sherpa               Nepal   
1074  2023-05-25  Szilárd Suhajda [hu]             Hungary   
1075  2023-05-25     Ranjit Kumar Shah               Nepal   
1076  2023-05-26          Pieter Swart              Canada   
1077  2023-06-03    Lhakpa Nuru Sherpa               Nepal   

                                         cause_of_death  mountain_name  \
0                                               Unknown             K2   
1                                                  Fall             K2   
2                                

In [59]:
# Initialize ArcGIS geocoder
nom = ArcGIS()

# Define a function to get latitude and longitude for a given location
def get_lat_long(location):
    try:
        loc = nom.geocode(location)
        return loc.latitude, loc.longitude
    except:
        return None, None

# Apply the function to each mountain_name in combined_data and store results in new columns
combined_data[['mountain_latitude', 'mountain_longitude']] = combined_data['mountain_name'].apply(lambda x: pd.Series(get_lat_long(x)))

# Display the updated DataFrame
print(combined_data.head())

         date     climber_name climber_nationality  \
0  2023-07-27  Muhammad Hassan            Pakistan   
1  2022-07-22    Matthew Eakin           Australia   
2  2022-07-22  Richard Cartier              Canada   
3  2022-07-21  Ali Akbar Sakhi         Afghanistan   
4  2021-07-25       Rick Allen      United Kingdom   

                         cause_of_death mountain_name  height_meters  \
0                               Unknown            K2         8612.0   
1                                  Fall            K2         8612.0   
2                                  Fall            K2         8612.0   
3  Unknown, suspected altitude sickness            K2         8612.0   
4                             Avalanche            K2         8612.0   

   height_feet        range mountain_location  mountain_latitude  \
0      28255.0  Karakoram      Pakistan/China           35.88091   
1      28255.0  Karakoram      Pakistan/China           35.88091   
2      28255.0  Karakoram      Pakista

In [61]:
# Apply the existing function to each climber_nationality in combined_data and store results in new columns
combined_data[['nationality_latitude', 'nationality_longitude']] = combined_data['climber_nationality'].apply(lambda x: pd.Series(get_lat_long(x)))

# Display the updated DataFrame
print(combined_data.head())



         date     climber_name climber_nationality  \
0  2023-07-27  Muhammad Hassan            Pakistan   
1  2022-07-22    Matthew Eakin           Australia   
2  2022-07-22  Richard Cartier              Canada   
3  2022-07-21  Ali Akbar Sakhi         Afghanistan   
4  2021-07-25       Rick Allen      United Kingdom   

                         cause_of_death mountain_name  height_meters  \
0                               Unknown            K2         8612.0   
1                                  Fall            K2         8612.0   
2                                  Fall            K2         8612.0   
3  Unknown, suspected altitude sickness            K2         8612.0   
4                             Avalanche            K2         8612.0   

   height_feet        range mountain_location  mountain_latitude  \
0      28255.0  Karakoram      Pakistan/China           35.88091   
1      28255.0  Karakoram      Pakistan/China           35.88091   
2      28255.0  Karakoram      Pakista

In [62]:
# Count the number of null values in each column
null_counts = combined_data.isnull().sum()

# Display the number of null values
print(null_counts)

date                       0
climber_name               0
climber_nationality        2
cause_of_death             9
mountain_name              0
height_meters            215
height_feet              215
range                    215
mountain_location        215
mountain_latitude          0
mountain_longitude         0
nationality_latitude       0
nationality_longitude      0
dtype: int64


In [63]:
#extracting the file
combined_data.to_excel('combined_data.xlsx', index=False)

In [None]:
pip install openmeteo-requests requests-cache retry-requests numpy pandas

In [9]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Function to fetch weather data for a given location
def fetch_weather_data(latitude, longitude):
    start_date = "2010-01-01"
    end_date = "2023-12-31"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "daily": ["temperature_2m_mean", "precipitation_hours"],
        "timezone": "Australia/Sydney"  # Update timezone as needed
    }
    responses = openmeteo.weather_api("https://archive-api.open-meteo.com/v1/archive", params=params)
    response = responses[0]  # Assuming only one response is returned
    daily = response.Daily()
    daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
    daily_precipitation_hours = daily.Variables(1).ValuesAsNumpy()
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "temperature_2m_mean": daily_temperature_2m_mean,
        "precipitation_hours": daily_precipitation_hours
    }
    return pd.DataFrame(data=daily_data)

# Initialize an empty DataFrame to store the weather data for all mountains
weather_data_all_mountains = pd.DataFrame(columns=["Mountain", "Latitude", "Longitude", "Date", "Temperature", "Precipitation"])

# Iterate through combined_data DataFrame and fetch weather data for each mountain
for index, row in combined_data.iterrows():
    mountain_name = row['mountain_name']
    latitude = row['latitude']
    longitude = row['longitude']
    weather_data = fetch_weather_data(latitude, longitude)
    # Calculate average temperature and total precipitation for all years
    avg_temperature = weather_data['temperature_2m_mean'].mean()
    total_precipitation = weather_data['precipitation_hours'].sum()
    # Append data to the weather_data_all_mountains DataFrame
    weather_data_all_mountains = pd.concat([weather_data_all_mountains, pd.DataFrame({
        "mountain_name": mountain_name,
        "mountain_latitude": latitude,
        "mountain_longitude": longitude,
        "date": weather_data['date'],
        "temperature": avg_temperature,
        "precipitation": total_precipitation
    })], ignore_index=True)

# Print the weather data for all mountains
print(weather_data_all_mountains)


        Mountain Latitude Longitude Date Temperature Precipitation  \
0            NaN      NaN       NaN  NaN         NaN           NaN   
1            NaN      NaN       NaN  NaN         NaN           NaN   
2            NaN      NaN       NaN  NaN         NaN           NaN   
3            NaN      NaN       NaN  NaN         NaN           NaN   
4            NaN      NaN       NaN  NaN         NaN           NaN   
...          ...      ...       ...  ...         ...           ...   
5511809      NaN      NaN       NaN  NaN         NaN           NaN   
5511810      NaN      NaN       NaN  NaN         NaN           NaN   
5511811      NaN      NaN       NaN  NaN         NaN           NaN   
5511812      NaN      NaN       NaN  NaN         NaN           NaN   
5511813      NaN      NaN       NaN  NaN         NaN           NaN   

         mountain_name  mountain_latitude  mountain_longitude  \
0                   K2           35.88091            76.51334   
1                   K2       

In [10]:
weather_data_all_mountains.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5511814 entries, 0 to 5511813
Data columns (total 12 columns):
 #   Column              Dtype              
---  ------              -----              
 0   Mountain            object             
 1   Latitude            object             
 2   Longitude           object             
 3   Date                object             
 4   Temperature         object             
 5   Precipitation       object             
 6   mountain_name       object             
 7   mountain_latitude   float64            
 8   mountain_longitude  float64            
 9   date                datetime64[ns, UTC]
 10  temperature         float32            
 11  precipitation       float32            
dtypes: datetime64[ns, UTC](1), float32(2), float64(2), object(7)
memory usage: 462.6+ MB


In [88]:
#reading the mountain weather data
weather_data = pd.read_csv('weather_final.csv', low_memory=False)
print(weather_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 429533 entries, 0 to 429532
Data columns (total 6 columns):
 #   Column         Non-Null Count   Dtype  
---  ------         --------------   -----  
 0   date           429533 non-null  object 
 1   mountain_name  429533 non-null  object 
 2   latitude       429533 non-null  float64
 3   longitude      429533 non-null  float64
 4   temperature    429533 non-null  object 
 5   precipitation  429533 non-null  object 
dtypes: float64(2), object(4)
memory usage: 19.7+ MB
None


In [89]:
combined_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1078 entries, 0 to 1077
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   date                   1078 non-null   object 
 1   climber_name           1078 non-null   object 
 2   climber_nationality    1076 non-null   object 
 3   cause_of_death         1069 non-null   object 
 4   mountain_name          1078 non-null   object 
 5   height_meters          863 non-null    float64
 6   height_feet            863 non-null    float64
 7   range                  863 non-null    object 
 8   mountain_location      863 non-null    object 
 9   mountain_latitude      1078 non-null   float64
 10  mountain_longitude     1078 non-null   float64
 11  nationality_latitude   1078 non-null   float64
 12  nationality_longitude  1078 non-null   float64
dtypes: float64(6), object(7)
memory usage: 109.6+ KB


In [90]:
# Left join combined_data with weather_data
final_mountain_data = pd.merge(combined_data, weather_data, on=['mountain_name', 'date'], how='left')

# Display the merged data
print(final_mountain_data.head())

         date     climber_name climber_nationality  \
0  2023-07-27  Muhammad Hassan            Pakistan   
1  2022-07-22    Matthew Eakin           Australia   
2  2022-07-22  Richard Cartier              Canada   
3  2022-07-21  Ali Akbar Sakhi         Afghanistan   
4  2021-07-25       Rick Allen      United Kingdom   

                         cause_of_death mountain_name  height_meters  \
0                               Unknown            K2         8612.0   
1                                  Fall            K2         8612.0   
2                                  Fall            K2         8612.0   
3  Unknown, suspected altitude sickness            K2         8612.0   
4                             Avalanche            K2         8612.0   

   height_feet        range mountain_location  mountain_latitude  \
0      28255.0  Karakoram      Pakistan/China           35.88091   
1      28255.0  Karakoram      Pakistan/China           35.88091   
2      28255.0  Karakoram      Pakista

In [94]:
final_mountain_data['temperature'] = final_mountain_data['temperature'] = final_mountain_data['temperature'].astype(float)
final_mountain_data['precipitation'] = final_mountain_data['precipitation'].astype(float) 
final_mountain_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1078 entries, 0 to 1077
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   date                   1078 non-null   object 
 1   climber_name           1078 non-null   object 
 2   climber_nationality    1076 non-null   object 
 3   cause_of_death         1069 non-null   object 
 4   mountain_name          1078 non-null   object 
 5   height_meters          863 non-null    float64
 6   height_feet            863 non-null    float64
 7   range                  863 non-null    object 
 8   mountain_location      863 non-null    object 
 9   mountain_latitude      1078 non-null   float64
 10  mountain_longitude     1078 non-null   float64
 11  nationality_latitude   1078 non-null   float64
 12  nationality_longitude  1078 non-null   float64
 13  latitude               1022 non-null   float64
 14  longitude              1022 non-null   float64
 15  temp

In [95]:
import numpy as np

# Define temperature category based on temperature values
def temperature_category(temp):
    if temp < 0:
        return 'Extremely Cold'
    elif 0 <= temp < 17:
        return 'Cold'
    elif 17 <= temp < 24:
        return 'Warm'
    else:
        return 'Hot'

# Define precipitation category based on precipitation values
def precipitation_category(precip):
    if precip < 0:
        return 'No Precipitation'
    elif 0 <= precip < 0.8:
        return 'Slight Precipitation'
    elif 0.8 <= precip < 2.9:
        return 'Moderate Precipitation'
    else:
        return 'High Precipitation'

# Create temperature_category column
final_mountain_data['temperature_category'] = final_mountain_data['temperature'].apply(temperature_category)

# Create precipitation_category column
final_mountain_data['precipitation_category'] = final_mountain_data['precipitation'].apply(precipitation_category)

# Create weather_condition column by concatenating temperature_category and precipitation_category
final_mountain_data['weather_condition'] = final_mountain_data['temperature_category'] + ' & ' + final_mountain_data['precipitation_category']

# View the updated dataframe
print(final_mountain_data.head())

         date     climber_name climber_nationality  \
0  2023-07-27  Muhammad Hassan            Pakistan   
1  2022-07-22    Matthew Eakin           Australia   
2  2022-07-22  Richard Cartier              Canada   
3  2022-07-21  Ali Akbar Sakhi         Afghanistan   
4  2021-07-25       Rick Allen      United Kingdom   

                         cause_of_death mountain_name  height_meters  \
0                               Unknown            K2         8612.0   
1                                  Fall            K2         8612.0   
2                                  Fall            K2         8612.0   
3  Unknown, suspected altitude sickness            K2         8612.0   
4                             Avalanche            K2         8612.0   

   height_feet        range mountain_location  mountain_latitude  \
0      28255.0  Karakoram      Pakistan/China           35.88091   
1      28255.0  Karakoram      Pakistan/China           35.88091   
2      28255.0  Karakoram      Pakista

In [96]:
# lower the string so they're easier to manipulate
final_mountain_data['cause_of_death'] = final_mountain_data['cause_of_death'].str.lower()

for i, element in enumerate(final_mountain_data['cause_of_death']):
    if pd.isna(element):
        final_mountain_data.at[i, 'cause_of_death'] = 'Other'  # Handle missing values
    elif 'serac' in element:
        final_mountain_data.replace(element, 'Avalanche', inplace = True)
    elif 'fall' in element:
        final_mountain_data.replace(element, 'Fall', inplace = True)
    elif 'fell' in element:
        final_mountain_data.replace(element, 'Fall', inplace = True)   
    elif 'disappeared' in element:
        final_mountain_data.replace(element, 'Disappeared', inplace = True)
    elif 'disappearance' in element:
        final_mountain_data.replace(element, 'Disappeared', inplace = True)
    elif 'illnes' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'avalanche' in element:
        final_mountain_data.replace(element, 'Avalanche', inplace = True)
    elif 'unspecified' in element:
        final_mountain_data.replace(element, 'Unknown', inplace = True)
    elif 'unknown' in element:
        final_mountain_data.replace(element, 'Unknown', inplace = True)
    elif 'altitude' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'storm' in element:
        final_mountain_data.replace(element, 'Storm', inplace = True)
    elif 'weather' in element:
        final_mountain_data.replace(element, 'Storm', inplace = True)
    elif 'lightning' in element:
        final_mountain_data.replace(element, 'Storm', inplace = True)
    elif 'exposure' in element:
        final_mountain_data.replace(element, 'Exposure', inplace = True)
    elif 'cardiac' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'stroke' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True) 
    elif 'heart' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'exhaustion' in element:
        final_mountain_data.replace(element, 'Exhaustion', inplace = True)
    elif 'pneumonia' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'pulmonary' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'edema' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'thrombosis' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'hemorrhage' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'hape' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'hace' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'respiratory' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'hypothermia' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'frostbite' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'rope' in element:
        final_mountain_data.replace(element, 'Fall', inplace = True)
    elif 'cold' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'organ' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'cerebral' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'sickness' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'coma' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'fever' in element:
        final_mountain_data.replace(element, 'Illness', inplace = True)
    elif 'collapsed' in element:
        final_mountain_data.replace(element, 'Exhaustion', inplace = True)
# the remaining causes can be categorized as 'Other', since they're unique cases that don't fall into the defined groups
    elif element.islower():
        final_mountain_data.replace(element, 'Other', inplace = True)

In [97]:
final_mountain_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1078 entries, 0 to 1077
Data columns (total 20 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   date                    1078 non-null   object 
 1   climber_name            1078 non-null   object 
 2   climber_nationality     1076 non-null   object 
 3   cause_of_death          1078 non-null   object 
 4   mountain_name           1078 non-null   object 
 5   height_meters           863 non-null    float64
 6   height_feet             863 non-null    float64
 7   range                   863 non-null    object 
 8   mountain_location       863 non-null    object 
 9   mountain_latitude       1078 non-null   float64
 10  mountain_longitude      1078 non-null   float64
 11  nationality_latitude    1078 non-null   float64
 12  nationality_longitude   1078 non-null   float64
 13  latitude                1022 non-null   float64
 14  longitude               1022 non-null   

In [98]:
final_mountain_data.drop(columns=['latitude', 'longitude'], inplace=True)

In [99]:
final_mountain_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1078 entries, 0 to 1077
Data columns (total 18 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   date                    1078 non-null   object 
 1   climber_name            1078 non-null   object 
 2   climber_nationality     1076 non-null   object 
 3   cause_of_death          1078 non-null   object 
 4   mountain_name           1078 non-null   object 
 5   height_meters           863 non-null    float64
 6   height_feet             863 non-null    float64
 7   range                   863 non-null    object 
 8   mountain_location       863 non-null    object 
 9   mountain_latitude       1078 non-null   float64
 10  mountain_longitude      1078 non-null   float64
 11  nationality_latitude    1078 non-null   float64
 12  nationality_longitude   1078 non-null   float64
 13  temperature             1022 non-null   float64
 14  precipitation           1022 non-null   

In [100]:
#extracting the file
final_mountain_data.to_excel('final_mountain_data.xlsx', index=False)