In [112]:
import numpy as np
import pandas as pd
from dbfread import DBF

## Data Import

In [113]:
def read_dbf(filename: str) -> pd.DataFrame:
  return pd.DataFrame(DBF(filename))

In [114]:
# MEMBERS
df_members = read_dbf('./data/members.DBF')

# EXPEDITIONS
df_exped = read_dbf('./data/exped.DBF')

# PEAKS
df_peaks = read_dbf('./data/peaks.DBF')

## Data Cleaning
### Members

In [115]:
df_members.shape

(87156, 78)

In [116]:
df_members.head()

Unnamed: 0,EXPID,MEMBID,PEAKID,MYEAR,MSEASON,FNAME,LNAME,SEX,AGE,BIRTHDATE,...,MEMBERMEMO,NECROLOGY,MSMTBID,MSMTTERM,HCN,MCHKSUM,MSMTNOTE1,MSMTNOTE2,MSMTNOTE3,DEATHRTE
0,AMAD78301,1,AMAD,1978,3,Jean Robert,Clemenson,M,0,,...,,,1,4,0,2426937,,,,
1,AMAD78301,2,AMAD,1978,3,Bernard,Dufour,M,0,,...,,,1,4,0,2426501,,,,
2,AMAD78301,3,AMAD,1978,3,Philippe,Gerard,M,0,,...,,,1,4,0,2431569,,,,
3,AMAD78301,4,AMAD,1978,3,Eric,Lasserre,M,0,,...,,,1,4,0,2426809,,,,
4,AMAD78301,5,AMAD,1978,3,Guy,Peters,M,0,,...,,,1,4,0,2429215,,,,


In [117]:
df_members.columns = df_members.columns.str.lower()

In [118]:
col_list = [
  # key keys
  'expid',
  'membid',
  'peakid',
  # expedition time
  'myear',
  'mseason',
  # member details
  'sex',
  'calcage',
  'status',
  'occupation',
  'leader',
  'deputy',
  'bconly',
  'nottobc',
  'support',
  'disabled',
  'hired',
  'sherpa',
  'tibetan',
  # expedition outcome
  'msuccess',
  'mclaimed',
  'mdisputed',
  'msmtbid', # summit bid
  'msmtterm', # summit bid termination reason
  # ascent type
  'msolo',
  'mtraverse',
  'mski',
  'mparapente',
  'mspeed',  
  'mroute1',
  'mroute2',
  'mroute3',
  'mo2used',
  'mo2none',
  'mo2climb',
  'mo2descent',
  'mo2sleep',
  'mo2medical',
  'mo2note'
]

In [119]:
df_members = df_members[col_list]

In [120]:
df_members.shape

(87156, 38)

In [121]:
df_members = df_members.loc[
  (df_members.bconly == False) &
  (df_members.nottobc == False) &
  (df_members.support == False) &
  (df_members.disabled == False) &
  (df_members.hired == False) &
  (df_members.mtraverse == False) &
  (df_members.mski == False) &
  (df_members.mparapente == False) &
  (df_members.tibetan == False) &
  (df_members.sherpa == False), :]

In [122]:
df_members.drop(
  ['bconly', 'nottobc', 'support', 'disabled', 'hired', 'mtraverse', 'mski', 'mparapente', 'tibetan'],
  axis=1, inplace=True)

In [123]:
df_members.status = df_members.status.str.lower()

In [124]:
df_members.columns

Index(['expid', 'membid', 'peakid', 'myear', 'mseason', 'sex', 'calcage',
       'status', 'occupation', 'leader', 'deputy', 'sherpa', 'msuccess',
       'mclaimed', 'mdisputed', 'msmtbid', 'msmtterm', 'msolo', 'mspeed',
       'mroute1', 'mroute2', 'mroute3', 'mo2used', 'mo2none', 'mo2climb',
       'mo2descent', 'mo2sleep', 'mo2medical', 'mo2note'],
      dtype='object')

In [125]:
df_members.msuccess.value_counts()

msuccess
False    38557
True     21184
Name: count, dtype: int64

In [126]:
df_members.msmtterm.value_counts()

msmtterm
1     21181
0     10305
3      5736
4      4757
7      2296
9      1903
12     1845
17     1628
5      1612
6      1489
19     1374
18     1116
8       858
10      824
14      762
15      674
13      557
2       517
16      173
11      134
Name: count, dtype: int64

## Peaks

In [127]:
df_peaks.shape

(479, 25)

In [128]:
df_peaks.head()

Unnamed: 0,PEAKID,PKNAME,PKNAME2,LOCATION,HEIGHTM,HEIGHTF,HIMAL,REGION,OPEN,UNLISTED,...,PEAKMEMO,PYEAR,PSEASON,PEXPID,PSMTDATE,PCOUNTRY,PSUMMITERS,PSMTNOTE,REFERMEMO,PHOTOMEMO
0,AMAD,Ama Dablam,Amai Dablang,Khumbu Himal,6814,22356,12,2,True,False,...,"Other map altitudes:\r\n 6814m - HMG-MT, HMG...",1961,1,AMAD61101,Mar 13,"New Zealand, USA, UK","Mike Gill, Wally Romanes, Barry Bishop, Michae...",,,W Face (High 126:5 May 1993)\r\nSE Face (High ...
1,AMPG,Amphu Gyabjen,Amphu Gyabien,Khumbu Himal (N of Ama Dablam),5630,18471,12,2,True,False,...,"Other map altitudes:\r\n 5630m - HMG-Finn, N...",1953,1,AMPG53101,Apr 11,UK,"John Hunt, Tom Bourdillon",,,
2,ANN1,Annapurna I,,Annapurna Himal,8091,26545,1,5,True,False,...,"Other map altitudes:\r\n 8091m - HMG-MT, HMG...",1950,1,ANN150101,Jun 03,France,"Maurice Herzog, Louis Lachenal",,Dyhrenfurth history 1950-1977 (MM 58:44-47 Nov...,S Face (High 122:3 Jan 1993) (Beghin accident)...
3,ANN2,Annapurna II,,Annapurna Himal,7937,26040,1,5,True,False,...,"Other map altitudes:\r\n 7937m - HMG-MT, HMG...",1960,1,ANN260101,May 17,"UK, Nepal","Richard Grant, Chris Bonington, Ang Nyima Sherpa",,Dyhrenfurth history 1960-1976 (MM 51:36-37 Sep...,N Face (MM 51:36 Sep 1976)
4,ANN3,Annapurna III,,Annapurna Himal,7555,24787,1,5,True,False,...,"Other map altitudes:\r\n 7555m - HMG-MT, HMG...",1961,1,ANN361101,May 06,India,"Mohan S. Kohli, Sonam Gyatso, Sonam Girmi",,,S Side (MM 125:11 Jan 1989)\r\nSW Face (MM 71:...


In [129]:
df_peaks.columns = df_peaks.columns.str.lower()

In [130]:
df_peaks.columns

Index(['peakid', 'pkname', 'pkname2', 'location', 'heightm', 'heightf',
       'himal', 'region', 'open', 'unlisted', 'trekking', 'trekyear',
       'restrict', 'phost', 'pstatus', 'peakmemo', 'pyear', 'pseason',
       'pexpid', 'psmtdate', 'pcountry', 'psummiters', 'psmtnote', 'refermemo',
       'photomemo'],
      dtype='object')

In [137]:
col_list = [
  'peakid',
  'heightm',
  'himal',
  'region',
  'trekking',
  'phost'
]

In [138]:
df_peaks = df_peaks[col_list]

In [139]:
df_peaks.shape

(479, 7)

In [140]:
df_peaks.head()

Unnamed: 0,peakid,heightm,himal,region,trekking,restrict,phost
0,AMAD,6814,12,2,False,,1
1,AMPG,5630,12,2,False,Opened in 2002,1
2,ANN1,8091,1,5,False,,1
3,ANN2,7937,1,5,False,,1
4,ANN3,7555,1,5,False,,1


In [141]:
df_peaks.restrict.unique()

array(['', 'Opened in 2002', 'Requires permit for Annapurna I',
       'Opened in 2014', 'Converted to trekking peak in 2002',
       "One permit for Churen Himal's three summits",
       'Peak entirely within China', 'Opened in 2003', 'Opened in 1997',
       'Requires permit for Gimmigela Chuli', 'Opened in 2013?',
       "One permit for Kanjiroba's north and south summits", 'Closed',
       'Opened in 2014 as Tenzing Peak',
       'Opened in 2002 as trekking peak',
       'Opened in 1997; converted to trekking peak in 2002',
       'Opened in 2001', 'Requires permit for Nuptse',
       'Opened in 2002, delisted in 2013?', 'Requires permit for Amphu I',
       'Delisted in 2014', 'Opened in 2014 (as Khang Karpo)',
       'Requires permit for Jobo Rinjang',
       'Requires permit for Lachama Chuli',
       'Opened in 2014 as Hillary Peak', 'Opened in 2016',
       'Requires Jannu permit', 'Opened in 2023'], dtype=object)