In [94]:
import numpy as np
import pandas as pd
from dbfread import DBF

## Data Import

In [95]:
def read_dbf(filename: str) -> pd.DataFrame:
  return pd.DataFrame(DBF(filename))

In [96]:
# MEMBERS
df_members = read_dbf('./data/members.DBF')

# EXPEDITIONS
df_exped = read_dbf('./data/exped.DBF')

# PEAKS
df_peaks = read_dbf('./data/peaks.DBF')

## Data Cleaning
### Members

In [97]:
df_members.shape

(87156, 78)

In [98]:
df_members.head()

Unnamed: 0,EXPID,MEMBID,PEAKID,MYEAR,MSEASON,FNAME,LNAME,SEX,AGE,BIRTHDATE,...,MEMBERMEMO,NECROLOGY,MSMTBID,MSMTTERM,HCN,MCHKSUM,MSMTNOTE1,MSMTNOTE2,MSMTNOTE3,DEATHRTE
0,AMAD78301,1,AMAD,1978,3,Jean Robert,Clemenson,M,0,,...,,,1,4,0,2426937,,,,
1,AMAD78301,2,AMAD,1978,3,Bernard,Dufour,M,0,,...,,,1,4,0,2426501,,,,
2,AMAD78301,3,AMAD,1978,3,Philippe,Gerard,M,0,,...,,,1,4,0,2431569,,,,
3,AMAD78301,4,AMAD,1978,3,Eric,Lasserre,M,0,,...,,,1,4,0,2426809,,,,
4,AMAD78301,5,AMAD,1978,3,Guy,Peters,M,0,,...,,,1,4,0,2429215,,,,


In [99]:
df_members.columns = df_members.columns.str.lower()

In [100]:
col_list = [
  # key keys
  'expid',
  'membid',
  'peakid',
  # expedition time
  'myear',
  'mseason',
  # member details
  'sex',
  'calcage',
  'status',
  'occupation',
  'leader',
  'deputy',
  'bconly',
  'nottobc',
  'support',
  'disabled',
  'hired',
  'sherpa',
  'tibetan',
  # expedition outcome
  'msuccess',
  'mclaimed',
  'mdisputed',
  # ascent type
  'msolo',
  'mtraverse',
  'mski',
  'mparapente',
  'mspeed',  
  'mroute1',
  'mroute2',
  'mroute3',
  'mo2used',
  'mo2none',
  'mo2climb',
  'mo2descent',
  'mo2sleep',
  'mo2medical',
  'mo2note'
]

In [101]:
df_members = df_members[col_list]

In [102]:
df_members.shape

(87156, 36)

In [103]:
df_members = df_members.loc[
  (df_members.bconly == False) &
  (df_members.nottobc == False) &
  (df_members.support == False) &
  (df_members.disabled == False) &
  (df_members.hired == False) &
  (df_members.mtraverse == False) &
  (df_members.mski == False) &
  (df_members.mparapente == False) &
  (df_members.tibetan == False), :]

In [104]:
df_members.drop(
  ['bconly', 'nottobc', 'support', 'disabled', 'hired', 'mtraverse', 'mski', 'mparapente', 'tibetan'],
  axis=1, inplace=True)

(60373, 36)

In [107]:
df_members.status = df_members.status.str.lower()

0           leader
1    deputy leader
2          climber
3       exp doctor
4          climber
Name: status, dtype: object

In [None]:
df_members.shape