## How to apply a function to a pandas series or dataframe?

In [5]:
import pandas as pd

In [6]:
#create dataframes
drinks_path = 'http://bit.ly/drinksbycountry'
train_path = 'http://bit.ly/kaggletrain'
drinks = pd.read_csv(drinks_path)
train = pd.read_csv(train_path)

In [7]:
train.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


### How to use built-in functions like the map-function

In [8]:
#IMPORTANT: PANDAS
#map #prepare #pandas
#map: series-method
#.map allows you to map an existing value of a series to a different kind of value
train['Sex_num'] = train.Sex.map({'female':0, 'male':1})

In [9]:
train.loc[0:3,['Sex','Sex_num']]

Unnamed: 0,Sex,Sex_num
0,male,1
1,female,0
2,female,0
3,female,0


### How to use the apply-method

In [10]:
#IMPORTANT: PANDAS
#apply #prepare #pandas

#apply: series- and dataframe-method
#apply as a series-function: applies a function to each element in a series
train['Name_length'] = train.Name.apply(len)
train.loc[0:3,['Name','Name_length']]

Unnamed: 0,Name,Name_length
0,"Braund, Mr. Owen Harris",23
1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",51
2,"Heikkinen, Miss. Laina",22
3,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",44


In [11]:
#IMPORTANT: PANDAS
#apply #prepare #pandas

#apply: series- and dataframe-method
#apply as a series-function: applies a function to each element in a series
import numpy as np
train['Fare_ceil'] = train.Fare.apply(np.ceil)
train.loc[0:3,['Fare','Fare_ceil']]

Unnamed: 0,Fare,Fare_ceil
0,7.25,8.0
1,71.2833,72.0
2,7.925,8.0
3,53.1,54.0


### How to apply created formulas

In [12]:
#creating a function which separates Names by ','
train.Name.str.split(',').head()

0                           [Braund,  Mr. Owen Harris]
1    [Cumings,  Mrs. John Bradley (Florence Briggs ...
2                            [Heikkinen,  Miss. Laina]
3      [Futrelle,  Mrs. Jacques Heath (Lily May Peel)]
4                          [Allen,  Mr. William Henry]
Name: Name, dtype: object

In [13]:
train.Name[0]

'Braund, Mr. Owen Harris'

In [14]:
#IMPORTANT: PANDAS
#apply #def #function #prepare #pandas

#defining function
def get_element(my_list, position):
    return my_list[position]

#applying function on train.Name
train.Name.str.split(',').apply(get_element, position=0).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

### How to use lambda-expressions

In [15]:
#IMPORTANT: PANDAS
#apply #lambda #prepare #pandas

#applying a lambda-function
train.Name.str.split(',').apply(lambda x: x[0]).head()

0       Braund
1      Cumings
2    Heikkinen
3     Futrelle
4        Allen
Name: Name, dtype: object

### How to apply a function to a dataframe

In [16]:
#loading drinks
drinks.head(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa


In [17]:
#IMPORTANT: PANDAS
#apply #prepare #pandas

#apply: series- and dataframe-method
#apply as a dataframe-function: applies a function to either axis of a dataframe
drinks.loc[:,'beer_servings':'wine_servings'].apply(max, axis=0)

beer_servings      376
spirit_servings    438
wine_servings      370
dtype: int64

In [18]:
drinks.loc[:,'beer_servings':'wine_servings'].apply(np.argmax, axis=1)

0      0
1      1
2      0
3      2
4      0
      ..
188    0
189    0
190    0
191    0
192    0
Length: 193, dtype: int64

In [19]:
drinks.head(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0,0,0,0.0,Asia
1,Albania,89,132,54,4.9,Europe
2,Algeria,25,0,14,0.7,Africa


In [20]:
#IMPORTANT: PANDAS
#applymap #prepare #pandas

#applymap: dataframe-method
#applies a function of every element of a dataframe
drinks.loc[:,'beer_servings':'wine_servings'] = drinks.loc[:,'beer_servings':'wine_servings'].applymap(float)
drinks.head(3)

Unnamed: 0,country,beer_servings,spirit_servings,wine_servings,total_litres_of_pure_alcohol,continent
0,Afghanistan,0.0,0.0,0.0,0.0,Asia
1,Albania,89.0,132.0,54.0,4.9,Europe
2,Algeria,25.0,0.0,14.0,0.7,Africa
