# 함수 매핑
## 개별 원소에 함수 매핑

In [23]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic.loc[:,['age','fare']]
df['ten'] = 10
df.head()

Unnamed: 0,age,fare,ten
0,22.0,7.25,10
1,38.0,71.2833,10
2,26.0,7.925,10
3,35.0,53.1,10
4,35.0,8.05,10


In [4]:
def add_10(n):
    return n + 10

def add_two_obj(a,b):
    return a + b

In [5]:
add_10(10)

20

In [6]:
add_two_obj(10 ,20)

30

In [8]:
sr1 = df['age'].apply(add_10)
sr1

0      32.0
1      48.0
2      36.0
3      45.0
4      45.0
       ... 
886    37.0
887    29.0
888     NaN
889    36.0
890    42.0
Name: age, Length: 891, dtype: float64

In [9]:
sr2 = df['age'].apply(add_two_obj,b=10)
sr2

0      32.0
1      48.0
2      36.0
3      45.0
4      45.0
       ... 
886    37.0
887    29.0
888     NaN
889    36.0
890    42.0
Name: age, Length: 891, dtype: float64

In [10]:
sr3 = df['age'].apply(lambda x : add_10(x))
sr3

0      32.0
1      48.0
2      36.0
3      45.0
4      45.0
       ... 
886    37.0
887    29.0
888     NaN
889    36.0
890    42.0
Name: age, Length: 891, dtype: float64

In [8]:
titanic = sns.load_dataset('titanic')
df = titanic.loc[:,['age','fare']]

df.head()

def add_10(n):
    return n + 10

def add_two_obj(a,b):
    return a + b

In [9]:
df_map = df.applymap(add_10)
df_map

Unnamed: 0,age,fare
0,32.0,17.2500
1,48.0,81.2833
2,36.0,17.9250
3,45.0,63.1000
4,45.0,18.0500
...,...,...
886,37.0,23.0000
887,29.0,40.0000
888,,33.4500
889,36.0,40.0000


In [10]:
def missing_value(series):
    return series.isnull()

In [11]:
result=df.apply(missing_value, axis=0)
result

Unnamed: 0,age,fare
0,False,False
1,False,False
2,False,False
3,False,False
4,False,False
...,...,...
886,False,False
887,False,False
888,True,False
889,False,False


In [12]:
def min_max(x):
    return x.max()-x.min()

In [13]:
result1 = df.apply(min_max)
result1

age      79.5800
fare    512.3292
dtype: float64

In [32]:
#x=df, a=df['age'], b=df['ten']

df['add'] = df.apply(lambda x: add_two_obj(x['age'],x['ten']),axis=1)
df

Unnamed: 0,age,fare,ten,add
0,22.0,7.2500,10,32.0
1,38.0,71.2833,10,48.0
2,26.0,7.9250,10,36.0
3,35.0,53.1000,10,45.0
4,35.0,8.0500,10,45.0
...,...,...,...,...
886,27.0,13.0000,10,37.0
887,19.0,30.0000,10,29.0
888,,23.4500,10,
889,26.0,30.0000,10,36.0


In [31]:
def missing_count(x):
    return missing_value(x).sum()

def total_number_missing(x):
    return missing_count(x).sum()

In [20]:
result_df = df.pipe(missing_value)
print(result_df)
print(type(result_df))      

       age   fare    ten    add
0    False  False  False  False
1    False  False  False  False
2    False  False  False  False
3    False  False  False  False
4    False  False  False  False
..     ...    ...    ...    ...
886  False  False  False  False
887  False  False  False  False
888   True  False  False   True
889  False  False  False  False
890  False  False  False  False

[891 rows x 4 columns]
<class 'pandas.core.frame.DataFrame'>


In [21]:
result_series = df.pipe(missing_count)
result_series

age     177
fare      0
ten       0
add     177
dtype: int64

In [22]:
result_value = df.pipe(total_number_missing)
print(result_value)
print(type(result_value))      

354
<class 'numpy.int64'>


In [24]:
df_sample = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]})
df_sample

Unnamed: 0,A,B
0,1,6
1,2,7
2,3,8
3,4,9
4,5,10


In [25]:
def add_two(x):
    return x+2

def mul_3(x):
    return x*3

In [26]:
df_sample.pipe(add_two).pipe(mul_3)

Unnamed: 0,A,B
0,9,24
1,12,27
2,15,30
3,18,33
4,21,36


In [35]:
df = titanic.loc[0:4, 'survived':'age']
df
columns = list(df.columns.values)

In [40]:
columns_sorted = sorted(columns)
df_sorted = df[columns_sorted]
df_sorted

Unnamed: 0,age,pclass,sex,survived
0,22.0,3,male,0
1,38.0,1,female,1
2,26.0,3,female,1
3,35.0,1,female,1
4,35.0,3,male,0


In [41]:
columns_reversed = list(reversed(columns))
df_reversed = df[columns_reversed]
df_reversed

Unnamed: 0,age,sex,pclass,survived
0,22.0,male,3,0
1,38.0,female,1,1
2,26.0,female,3,1
3,35.0,female,1,1
4,35.0,male,3,0


In [42]:
columns_customed = ['pclass','sex','age','survived']
df_customed = df[columns_customed]
df_customed

Unnamed: 0,pclass,sex,age,survived
0,3,male,22.0,0
1,1,female,38.0,1
2,3,female,26.0,1
3,1,female,35.0,1
4,3,male,35.0,0
