## Pandas Pivot
- 데이터 프레임의 컬럼 데이터에서 index, column, value를 선택해서 데이터프레임을 만드는 방법
- `df.pivot(index, columns, values)` 
    * groupby 하고 pivot 실행 
- `df.pivot_table(values, index, columns, aggfunc)`

In [3]:
import numpy as np
import pandas as pd

#### pandas io
- 데이터 프레임을 저장, 로드

In [4]:
# load
titanic = pd.read_csv("datas/train.csv")
titanic.tail(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [None]:
# save 
titanic.to_csv('datas/titanic.csv', index=False)

### Titanic Data 분석

1. 성별, 좌석 등급에 따른 데이터의 수 

In [5]:
# groupby
df1 = titanic.groupby(['Sex','Pclass']).size().reset_index(name='Counts')
df1

Unnamed: 0,Sex,Pclass,Counts
0,female,1,94
1,female,2,76
2,female,3,144
3,male,1,122
4,male,2,108
5,male,3,347


In [6]:
# pivot
result = df1.pivot('Sex','Pclass', 'Counts')
result

Pclass,1,2,3
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
female,94,76,144
male,122,108,347


In [7]:
# pivot table 이용 
titanic['Counts']=1
titanic.tail(2)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Counts
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C,1
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q,1


In [8]:
result = titanic.pivot_table('Counts', ['Pclass'], ['Survived'],aggfunc=np.sum)
result


Survived,0,1
Pclass,Unnamed: 1_level_1,Unnamed: 2_level_1
1,80,136
2,97,87
3,372,119


In [9]:
result['total'] = result[0]+result[1]
result

Survived,0,1,total
Pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,80,136,216
2,97,87,184
3,372,119,491


In [17]:
result.loc['total']= result.loc[1]+result.loc[2]+result.loc[3]
result

Survived,0,1,total
Pclass,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,80,136,216
2,97,87,184
3,372,119,491
total,549,342,891
