In [85]:
import pandas as pd
titanic = pd.read_csv('data/titanic.csv')

titanic['age'] = titanic['age'].replace(['?'], [None]).astype('float')
titanic['fare'] = titanic['fare'].replace(['?'], [None]).astype('float')

In [87]:
def years_to_days(years):
    return years * 365

titanic['age'].apply(years_to_days)

0       10585.0000
1         334.5955
2         730.0000
3       10950.0000
4        9125.0000
           ...    
1304     5292.5000
1305           NaN
1306     9672.5000
1307     9855.0000
1308    10585.0000
Name: age, Length: 1309, dtype: float64

In [89]:
def get_age_group(age):
    if age < 2:
        return 'infant'
    elif age < 12:
        return 'child'
    elif age < 18:
        return 'teen'
    elif age < 50:
        return 'adult'
    else:
        return 'senior'

In [91]:
titanic['age'].apply(get_age_group)

0        adult
1       infant
2        child
3        adult
4        adult
         ...  
1304      teen
1305    senior
1306     adult
1307     adult
1308     adult
Name: age, Length: 1309, dtype: object

In [93]:
titanic['age_group'] = titanic['age'].apply(get_age_group)

In [95]:
titanic['age_group'].value_counts()

age_group
adult     782
senior    373
child      69
teen       63
infant     22
Name: count, dtype: int64

In [97]:
titanic.groupby('age_group').survived.mean()

age_group
adult     0.386189
child     0.492754
infant    0.772727
senior    0.313673
teen      0.476190
Name: survived, dtype: float64

In [99]:
titanic.groupby(['age_group','sex']).survived.mean()

age_group  sex   
adult      female    0.748201
           male      0.186508
child      female    0.515152
           male      0.472222
infant     female    0.888889
           male      0.692308
senior     female    0.698276
           male      0.140078
teen       female    0.833333
           male      0.151515
Name: survived, dtype: float64

In [101]:
#Apply with Lambda func
# f{ } is what you called an f-string in python that allows you to embed expressions inside string literals using curly braces {}
titanic['fare'].apply(lambda x: f'${x*24}')

0                   $5072.1
1       $3637.2000000000003
2       $3637.2000000000003
3       $3637.2000000000003
4       $3637.2000000000003
               ...         
1304              $346.9008
1305              $346.9008
1306    $173.39999999999998
1307    $173.39999999999998
1308                 $189.0
Name: fare, Length: 1309, dtype: object

In [103]:
#practice function with 2 argument in apply
def convert_currency(num, multi):
    return f'${num*multi}';
#args always has to be a tuple
titanic['fare'].apply(convert_currency, args=(24,))

0                   $5072.1
1       $3637.2000000000003
2       $3637.2000000000003
3       $3637.2000000000003
4       $3637.2000000000003
               ...         
1304              $346.9008
1305              $346.9008
1306    $173.39999999999998
1307    $173.39999999999998
1308                 $189.0
Name: fare, Length: 1309, dtype: object

In [105]:
#APPLY with DATAFRAME

df = titanic[['pclass','survived','age']]

In [107]:
df

Unnamed: 0,pclass,survived,age
0,1,1,29.0000
1,1,1,0.9167
2,1,0,2.0000
3,1,0,30.0000
4,1,0,25.0000
...,...,...,...
1304,3,0,14.5000
1305,3,0,
1306,3,0,26.5000
1307,3,0,27.0000


In [109]:
def get_range(s):
    return s.max() - s.min()

#Apply will apply on every column in the DF ( default axis = 0)
df.apply(get_range)

pclass       2.0000
survived     1.0000
age         79.8333
dtype: float64

In [111]:
df.apply(get_range, axis=1)

0       28.0000
1        0.0833
2        2.0000
3       30.0000
4       25.0000
         ...   
1304    14.5000
1305     3.0000
1306    26.5000
1307    27.0000
1308    29.0000
Length: 1309, dtype: float64

In [113]:
def get_fam_size(s):
    fam_size = s.sibsp + s.parch
    if fam_size <= 0:
        return 'single'
    elif fam_size < 5:
        return 'average'
    else:
        return 'large'

titanic['fam_size'] = titanic.apply(get_fam_size, axis=1)

In [115]:
titanic['fam_size'].value_counts()

fam_size
single     790
average    459
large       60
Name: count, dtype: int64

In [117]:
titanic.groupby('fam_size').survived.mean()

fam_size
average    0.549020
large      0.150000
single     0.302532
Name: survived, dtype: float64

In [119]:
#SERIES MAP FUNCTION ( SERIES ONLY)

titanic['pclass'].map({1:'1st',2:'2nd',3:'3rd'})

0       1st
1       1st
2       1st
3       1st
4       1st
       ... 
1304    3rd
1305    3rd
1306    3rd
1307    3rd
1308    3rd
Name: pclass, Length: 1309, dtype: object

In [121]:
titanic['age'].map(lambda x: x < 18)

0       False
1        True
2        True
3       False
4       False
        ...  
1304     True
1305    False
1306    False
1307    False
1308    False
Name: age, Length: 1309, dtype: bool