In [164]:
import numpy as np
import pandas as pd
from dbfread import DBF

## Data Import

In [165]:
def read_dbf(filename: str) -> pd.DataFrame:
  return pd.DataFrame(DBF(filename))

In [166]:
# MEMBERS
df_members = read_dbf('./data/members.DBF')

# EXPEDITIONS
df_exped = read_dbf('./data/exped.DBF')

# PEAKS
df_peaks = read_dbf('./data/peaks.DBF')

## Data Cleaning
### Members

In [167]:
df_members.shape

(87156, 78)

In [168]:
df_members.head()

Unnamed: 0,EXPID,MEMBID,PEAKID,MYEAR,MSEASON,FNAME,LNAME,SEX,AGE,BIRTHDATE,...,MEMBERMEMO,NECROLOGY,MSMTBID,MSMTTERM,HCN,MCHKSUM,MSMTNOTE1,MSMTNOTE2,MSMTNOTE3,DEATHRTE
0,AMAD78301,1,AMAD,1978,3,Jean Robert,Clemenson,M,0,,...,,,1,4,0,2426937,,,,
1,AMAD78301,2,AMAD,1978,3,Bernard,Dufour,M,0,,...,,,1,4,0,2426501,,,,
2,AMAD78301,3,AMAD,1978,3,Philippe,Gerard,M,0,,...,,,1,4,0,2431569,,,,
3,AMAD78301,4,AMAD,1978,3,Eric,Lasserre,M,0,,...,,,1,4,0,2426809,,,,
4,AMAD78301,5,AMAD,1978,3,Guy,Peters,M,0,,...,,,1,4,0,2429215,,,,


In [169]:
df_members.columns = df_members.columns.str.lower()

In [170]:
col_list = [
  # key keys
  'expid',
  'membid',
  'peakid',
  # expedition time
  'myear',
  'mseason',
  # member details
  'sex',
  'calcage',
  'status',
  'occupation',
  'leader',
  'deputy',
  'bconly',
  'nottobc',
  'support',
  'disabled',
  'hired',
  'sherpa',
  'tibetan',
  # expedition outcome
  'msuccess',
  'mclaimed',
  'mdisputed',
  'msmtbid', # summit bid
  'msmtterm', # summit bid termination reason
  # ascent type
  'msolo',
  'mtraverse',
  'mski',
  'mparapente',
  'mspeed',  
  'mroute1',
  'mroute2',
  'mroute3',
  'mo2used',
  'mo2none',
  'mo2climb',
  'mo2descent',
  'mo2sleep',
  'mo2medical',
  'mo2note'
]

In [171]:
df_members = df_members[col_list]

In [172]:
df_members.shape

(87156, 38)

In [173]:
df_members = df_members.loc[
  (df_members.bconly == False) &
  (df_members.nottobc == False) &
  (df_members.support == False) &
  (df_members.disabled == False) &
  (df_members.hired == False) &
  (df_members.mtraverse == False) &
  (df_members.mski == False) &
  (df_members.mparapente == False) &
  (df_members.tibetan == False) &
  (df_members.sherpa == False), :]

In [174]:
df_members.drop(
  ['bconly', 'nottobc', 'support', 'disabled', 'hired', 'mtraverse', 'mski', 'mparapente', 'tibetan'],
  axis=1, inplace=True)

In [175]:
df_members.status = df_members.status.str.lower()

In [176]:
df_members.columns

Index(['expid', 'membid', 'peakid', 'myear', 'mseason', 'sex', 'calcage',
       'status', 'occupation', 'leader', 'deputy', 'sherpa', 'msuccess',
       'mclaimed', 'mdisputed', 'msmtbid', 'msmtterm', 'msolo', 'mspeed',
       'mroute1', 'mroute2', 'mroute3', 'mo2used', 'mo2none', 'mo2climb',
       'mo2descent', 'mo2sleep', 'mo2medical', 'mo2note'],
      dtype='object')

In [177]:
df_members.msuccess.value_counts()

msuccess
False    38557
True     21184
Name: count, dtype: int64

In [178]:
df_members.msmtterm.value_counts()

msmtterm
1     21181
0     10305
3      5736
4      4757
7      2296
9      1903
12     1845
17     1628
5      1612
6      1489
19     1374
18     1116
8       858
10      824
14      762
15      674
13      557
2       517
16      173
11      134
Name: count, dtype: int64