Mountains
---

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

### Loading the data

In [None]:
df = pd.read_csv('Mountains.csv')

In [None]:
df

### Inspecting the data

In [None]:
df.info()

In [None]:
df.dtypes

In [None]:
df.describe()

### Changing the index

In [None]:
df.set_index('Mountain', inplace=True)

In [None]:
df.head()

### Dropping columns and rows

In [None]:
df.drop(['Rank','Height (ft)','Coordinates', 'Parent mountain'], axis=1, inplace=True)
df.head()

In [None]:
df.drop(['Mount Everest / Sagarmatha / Chomolungma', 'Muztagh Ata'], axis=0, inplace=True)
df.head()

### Boolean selection

In [None]:
df[df['First ascent'] == 'unclimbed']

In [None]:
df = df[df['First ascent'] != 'unclimbed'].copy()

In [None]:
df['Height (m)'] > 8000
# Returns:
# Mountain
# K2 / Qogir / Godwin Austen          True
# Kangchenjunga                       True
# Lhotse                              True
# ...

In [None]:
df[df['Height (m)'] > 8000]

### Dropping missing values

In [None]:
df.dropna(inplace=True)
df.info()

In [None]:
df['First ascent'] = df['First ascent'].astype(int)
df['Ascents bef. 2004'] = df['Ascents bef. 2004'].astype(int)
df['Failed attempts bef. 2004'] = df['Failed attempts bef. 2004'].astype(int)
df.info()

### Defining new variables, normalizing and sorting

In [None]:
df['Total attempts'] = df['Ascents bef. 2004'] + df['Failed attempts bef. 2004']
df['Success rate'] = (df['Ascents bef. 2004'] / df['Total attempts'])*100
df['Difficulty'] = (df['Total attempts'] / df['Success rate'])*100
df['Difficulty'] = df['Difficulty'] / df['Difficulty'].max()
df = df.sort_values(by='Difficulty', ascending=False)
df.head(10)

### Bar plots

In [None]:
values = df[0:10]['Difficulty']
colormap = plt.cm.tab10(range(0, len(values)))
bar_plot = values.plot.barh(color=colormap)
plt.show()

In [None]:
values = df[0:10]['Difficulty']
colormap = plt.cm.tab10(range(0, len(values)))
bar_plot = values.plot.barh(color=colormap)

# Add titles, labels, invert y-axis
bar_plot.set_title('The 10 most difficult mountains')
bar_plot.set_xlabel('Difficulty')
bar_plot.invert_yaxis()
plt.show()

### Histograms

In [None]:
df['First ascent'].plot.hist()
plt.show()

In [None]:
hist = df['First ascent'].plot.hist(bins=20, color='orange', edgecolor='black')
hist.set_xlabel('Year')
hist.set_ylabel('Number of first ascents')
hist.set_title('Popular years for climbing')
plt.show()

In [None]:
df['Height (m)'] > 8000

In [None]:
high = df['Height (m)'] > 8000

In [None]:
df.loc[high, 'First ascent']

In [None]:
df.loc[~high, 'First ascent']

In [None]:
plt.hist([df.loc[high, 'First ascent'], df.loc[~high, 'First ascent']], stacked=True, edgecolor='black', bins=20)
plt.legend(['over 8000m','under 8000m'], loc='upper right')
plt.xlabel('Year')
plt.ylabel('Number of first ascents')
plt.title('Year of first ascent')
plt.show()

### Scatterplots

In [None]:
plt.scatter(df['Height (m)'], df['Total attempts'])
plt.show()

In [None]:
plt.scatter(df['Height (m)'], df['Total attempts'], color='red', edgecolor='black')
plt.ylabel('Total attempts')
plt.xlabel('Mountain height')
plt.title('Number of total attempts vs mountain height', fontsize=14)
plt.show()

### Exercise

In [None]:
df['Range'].head(10)

In [None]:
df['Range'].tail(10)

In [None]:
Himalaya = [True if 'Himalaya' in str(x) else False for x in df['Range']]

# Print first five entries for illustration
Himalaya[:5]

In [None]:
count = df[Himalaya].groupby('Range').size()

# Take a look at the results
count

In [None]:
Himalaya = [True if 'Himalaya' in str(x) else False for x in df['Range']]
count = df[Himalaya].groupby('Range').size()

plt.figure(figsize=(8,5))
colormap = plt.cm.tab20(range(0, len(count)))
plot = count.plot.barh(title='The Himalaya subranges', color=colormap)
plot.set_xlabel('Number of mountains in the subrange')
plot.set_ylabel('Subrange')
plt.show()