## pandas.DataFrame.pipe()

데이터프레임 객체에 함수를 매핑하기 위해 pipe 함수를 사용한다.
매핑 함수의 리턴값에 따라서 객체의 종류가 정해진다.

    
### 리턴 타입 
1. 데이터프레임
2. 시리즈
3. 개별값

In [24]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
df1 = titanic.loc[:, ['age', 'fare']]
df2 = titanic.loc[:, ['embark_town', 'embarked']]

def missing_value(df):
    return df.isna()

### 데이터프레임 -> 데이터프레임

In [None]:
df_result = df1.pipe(missing_value)

print(type(df_result), end='\n\n')
print(df_result.head())

### 데이터프레임 -> 시리즈

In [27]:
def missing_count(df):
    return missing_value(df).sum()

series_result = df1.pipe(missing_count)

print(type(series_result), end='\n\n')
print(series_result)

<class 'pandas.core.series.Series'>

age     177
fare      0
dtype: int64


## 열의 재구성
### 열 순서 변경

In [30]:
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


### 열 슬라이싱

In [34]:
df = titanic.loc[:, 'survived':'age']
df.head()

Unnamed: 0,survived,pclass,sex,age
0,0,3,male,22.0
1,1,1,female,38.0
2,1,3,female,26.0
3,1,1,female,35.0
4,0,3,male,35.0


In [None]:
## 열 데이터 분리 

In [77]:
stock = pd.read_excel('data/주가데이터.xlsx')
stock_copy = stock.copy()
print(stock.head())
print(stock.info())

         연월일   당일종가  전일종가     시가     고가     저가     거래량
0 2018-07-02  10100   600  10850  10900  10000  137977
1 2018-06-29  10700   300  10550  10900   9990  170253
2 2018-06-28  10400   500  10900  10950  10150  155769
3 2018-06-27  10900   100  10800  11050  10500  133548
4 2018-06-26  10800   350  10900  11000  10700   63039
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   연월일     20 non-null     datetime64[ns]
 1   당일종가    20 non-null     int64         
 2   전일종가    20 non-null     int64         
 3   시가      20 non-null     int64         
 4   고가      20 non-null     int64         
 5   저가      20 non-null     int64         
 6   거래량     20 non-null     int64         
dtypes: datetime64[ns](1), int64(6)
memory usage: 1.2 KB
None


In [79]:
stock['연월일'] = stock['연월일'].astype(str)
print(stock.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   연월일     20 non-null     object
 1   당일종가    20 non-null     int64 
 2   전일종가    20 non-null     int64 
 3   시가      20 non-null     int64 
 4   고가      20 non-null     int64 
 5   저가      20 non-null     int64 
 6   거래량     20 non-null     int64 
dtypes: int64(6), object(1)
memory usage: 1.2+ KB
None


In [81]:
dates = stock['연월일'].str.split('-')
stock['연'] = dates.str.get(0)
stock['월'] = dates.str[1]
stock['일'] = dates.str[2]
stock.head()

Unnamed: 0,연월일,당일종가,전일종가,시가,고가,저가,거래량,연,월,일
0,2018-07-02,10100,600,10850,10900,10000,137977,2018,7,2
1,2018-06-29,10700,300,10550,10900,9990,170253,2018,6,29
2,2018-06-28,10400,500,10900,10950,10150,155769,2018,6,28
3,2018-06-27,10900,100,10800,11050,10500,133548,2018,6,27
4,2018-06-26,10800,350,10900,11000,10700,63039,2018,6,26


In [83]:
date_expand = stock['연월일'].str.split('-', expand=True)
date_expand.columns = ['연', '월', '일']
print(date_expand)


       연   월   일
0   2018  07  02
1   2018  06  29
2   2018  06  28
3   2018  06  27
4   2018  06  26
5   2018  06  25
6   2018  06  22
7   2018  06  21
8   2018  06  20
9   2018  06  19
10  2018  06  18
11  2018  06  15
12  2018  06  14
13  2018  06  12
14  2018  06  11
15  2018  06  08
16  2018  06  07
17  2018  06  05
18  2018  06  04
19  2018  06  01


In [107]:
stock_copy = stock.copy()
pd.concat([stock_copy, date_expand], axis=1).head()

Unnamed: 0,연월일,당일종가,전일종가,시가,고가,저가,거래량,연,월,일,연.1,월.1,일.1
0,2018-07-02,10100,600,10850,10900,10000,137977,2018,7,2,2018,7,2
1,2018-06-29,10700,300,10550,10900,9990,170253,2018,6,29,2018,6,29
2,2018-06-28,10400,500,10900,10950,10150,155769,2018,6,28,2018,6,28
3,2018-06-27,10900,100,10800,11050,10500,133548,2018,6,27,2018,6,27
4,2018-06-26,10800,350,10900,11000,10700,63039,2018,6,26,2018,6,26


## 데이터프레임 합치기

### pandas.concat()

## 데이터프레임 병합
### pandas.merge()
키가 되는 인덱스나 열이 필요하며 양쪽 데이터에 모두 존재해야한다.
   




### pandas.DataFrame1.join(DataFrame2)
merge와 유사하며 두 데이터프레임의 행 인덱스를 기준으로 결합한다.   
on=컬럼명 형식으로 작업하면 merge와 동일한 결과를 출력한다.