<a href="https://colab.research.google.com/github/Sjleerodls/Data_Analysis/blob/main/lab_da/da12_shape.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DataFrame 모양(shape) 변경

* wide (columns) --> long (rows)
* long (rows) --> wide (columns)

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# stack vs unstack

In [148]:
df = pd.DataFrame(data=np.arange(1, 7).reshape((2, 3)),     # reshape으로 행과 열 설정
                  columns=['a', 'b', 'c'],
                  index=['X', 'Y'])
df

Unnamed: 0,a,b,c
X,1,2,3
Y,4,5,6


In [149]:
df_stacked = df.stack()
df_stacked  #> 컬럼 이름들이 index(row label)로 바뀜. wide --> long

Unnamed: 0,Unnamed: 1,0
X,a,1
X,b,2
X,c,3
Y,a,4
Y,b,5
Y,c,6


In [10]:
df_unstacked = df_stacked.unstack()     # index level : -1 기본 설정되어 있음.
df_unstacked    #> 가장 마지막 레벨의 인덱스를 컬럼으로 변환. long --> wide

Unnamed: 0,a,b,c
X,1,2,3
Y,4,5,6


In [12]:
df_stacked.unstack(level=0)     # 기본 설정이 -1 로 설정되어 있지만 0으로 설정하면 기존 X,Y가 컬럼으로 바뀜.

Unnamed: 0,X,Y
a,1,4
b,2,5
c,3,6


컬럼 이름(인덱스)가 MultiIndex인 경우

In [17]:
df = pd.DataFrame(data=np.arange(1, 13).reshape(2, 6),
                  columns=[['Lunch'] * 3 + ['Dinner'] * 3,
                           ['Fri', 'Sat', 'Sun'] * 2])
df

Unnamed: 0_level_0,Lunch,Lunch,Lunch,Dinner,Dinner,Dinner
Unnamed: 0_level_1,Fri,Sat,Sun,Fri,Sat,Sun
0,1,2,3,4,5,6
1,7,8,9,10,11,12


In [19]:
df.columns      #> MultiIndex       튜플로 이루어진 배열.

MultiIndex([( 'Lunch', 'Fri'),
            ( 'Lunch', 'Sat'),
            ( 'Lunch', 'Sun'),
            ('Dinner', 'Fri'),
            ('Dinner', 'Sat'),
            ('Dinner', 'Sun')],
           )

In [22]:
df.stack()      # 마지막 레벨의 컬럼만 index로 내려옴.
                #> 가장 마지막 레벨의 컬럼 이름들을 index(row label)로 변환

Unnamed: 0,Unnamed: 1,Lunch,Dinner
0,Fri,1,4
0,Sat,2,5
0,Sun,3,6
1,Fri,7,10
1,Sat,8,11
1,Sun,9,12


In [25]:
df.stack(level=[0, 1])      # 리스트로도 사용 가능함.

Unnamed: 0,Unnamed: 1,Unnamed: 2,0
0,Dinner,Fri,4
0,Dinner,Sat,5
0,Dinner,Sun,6
0,Lunch,Fri,1
0,Lunch,Sat,2
0,Lunch,Sun,3
1,Dinner,Fri,10
1,Dinner,Sat,11
1,Dinner,Sun,12
1,Lunch,Fri,7


# pivot vs melt

In [28]:
df = pd.DataFrame(data={
    'time': ['Lunch'] * 3 + ['Dinner'] * 3,
    'day': ['Fri', 'Sat', 'Sun'] * 2,
    'tip': np.arange(1,7),
    'total_bill': np.arange(10, 70, 10)
})

df

Unnamed: 0,time,day,tip,total_bill
0,Lunch,Fri,1,10
1,Lunch,Sat,2,20
2,Lunch,Sun,3,30
3,Dinner,Fri,4,40
4,Dinner,Sat,5,50
5,Dinner,Sun,6,60


## pivot

`pd.DataFrame.pivot()` 메서드 파라미터:
* columns : pivoting 데이터프레임에서 컬럼 이름으로 사용하기 위한 컬럼(들)의 이름.
* index : pivoting 데이터프레임에서 인덱스(row label)로 사용하기 위한 컬럼(들)의 이름.
* values : pivoting 데이터프레임에서 각 셀에 채울 값들을 가지고 있는 컬럼(들)의 이름.

In [30]:
df.pivot(columns='day', index='time', values='tip')

day,Fri,Sat,Sun
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Dinner,4,5,6
Lunch,1,2,3


In [32]:
df.pivot(columns='time', index='day', values='total_bill')

time,Dinner,Lunch
day,Unnamed: 1_level_1,Unnamed: 2_level_1
Fri,40,10
Sat,50,20
Sun,60,30


## melt

In [34]:
df = pd.DataFrame(data={
    'gender': ['Female', 'Male'],
    'lunch': [10, 7],
    'dinner': [20, 30]
})

df

Unnamed: 0,gender,lunch,dinner
0,Female,10,20
1,Male,7,30


`pd.DataFrame.melt()` 메서드 파라미터 :
* id_vars : melting 될 때 컬럼으로 유지될 컬럼(들) 이름.
    * id_vars에 설정하지 않은 컬럼 이름들은 variable 컬럼으로 melting됨.
    * variable 컬럼의 값으로 들어감.
    * iv_vars에 설정하지 않은 컬럼들의 모든 셀에 있는 값들은 value 컬럼으로 melting됨.
* var_name : variable 컬럼의 이름을 사용할 문자열.
* value_name : value 컬럼의 이름으로 사용할 문자열.

In [36]:
df.melt(id_vars='gender')

Unnamed: 0,gender,variable,value
0,Female,lunch,10
1,Male,lunch,7
2,Female,dinner,20
3,Male,dinner,30


In [38]:
df.melt(id_vars='gender', var_name='time')

Unnamed: 0,gender,time,value
0,Female,lunch,10
1,Male,lunch,7
2,Female,dinner,20
3,Male,dinner,30


In [41]:
df.melt(id_vars='gender', var_name='time', value_name='count')

Unnamed: 0,gender,time,count
0,Female,lunch,10
1,Male,lunch,7
2,Female,dinner,20
3,Male,dinner,30


# pivot_table

groupby 연산과 통계 함수 적용 결과를 pivoting 하는 함수

In [44]:
tips = sns.load_dataset('tips')

In [45]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


## 성별 팁의 평균

In [46]:
# 성별 팁의 평균을 계산
tips.groupby(by=['sex']).tip.mean()

Unnamed: 0_level_0,tip
sex,Unnamed: 1_level_1
Male,3.089618
Female,2.833448


In [48]:
tips.pivot_table(values='tip', index='sex')     # aggfunc = 'mean' 기본값

Unnamed: 0_level_0,tip
sex,Unnamed: 1_level_1
Male,3.089618
Female,2.833448


## 성별, 흡연여부별 tip의 평균

In [53]:
by_sex_smoker = tips.groupby(by=['sex', 'smoker']).tip.mean()
by_sex_smoker

Unnamed: 0_level_0,Unnamed: 1_level_0,tip
sex,smoker,Unnamed: 2_level_1
Male,Yes,3.051167
Male,No,3.113402
Female,Yes,2.931515
Female,No,2.773519


In [54]:
by_sex_smoker.unstack()

smoker,Yes,No
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,3.051167,3.113402
Female,2.931515,2.773519


In [55]:
tips.pivot_table(values='tip', index=['sex', 'smoker'])

Unnamed: 0_level_0,Unnamed: 1_level_0,tip
sex,smoker,Unnamed: 2_level_1
Male,Yes,3.051167
Male,No,3.113402
Female,Yes,2.931515
Female,No,2.773519


In [57]:
tips.pivot_table(values='tip', index='sex', columns='smoker')

smoker,Yes,No
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,3.051167,3.113402
Female,2.931515,2.773519


## 성별 팁, 영수증 평균

In [60]:
by_sex = tips.groupby(by=['sex'])[['tip', 'total_bill']].mean()
by_sex

Unnamed: 0_level_0,tip,total_bill
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,3.089618,20.744076
Female,2.833448,18.056897


In [62]:
tips.pivot_table(values=['tip', 'total_bill'], index='sex')

Unnamed: 0_level_0,tip,total_bill
sex,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,3.089618,20.744076
Female,2.833448,18.056897


## 성별, 흡연여부별 팁, 영수증 평균

In [150]:
by_sex_smoker = tips.groupby(by=['sex', 'smoker'])[['tip', 'total_bill']].mean()
by_sex_smoker

Unnamed: 0_level_0,Unnamed: 1_level_0,tip,total_bill
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,3.051167,22.2845
Male,No,3.113402,19.791237
Female,Yes,2.931515,17.977879
Female,No,2.773519,18.105185


In [152]:
by_sex_smoker.unstack()

Unnamed: 0_level_0,tip,tip,total_bill,total_bill
smoker,Yes,No,Yes,No
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Male,3.051167,3.113402,22.2845,19.791237
Female,2.931515,2.773519,17.977879,18.105185


In [68]:
tips.pivot_table(values=['tip', 'total_bill'], index=['sex', 'smoker'])

Unnamed: 0_level_0,Unnamed: 1_level_0,tip,total_bill
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Yes,3.051167,22.2845
Male,No,3.113402,19.791237
Female,Yes,2.931515,17.977879
Female,No,2.773519,18.105185


In [72]:
tips.pivot_table(values=['tip', 'total_bill'], index=['sex'], columns='smoker')

Unnamed: 0_level_0,tip,tip,total_bill,total_bill
smoker,Yes,No,Yes,No
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Male,3.051167,3.113402,22.2845,19.791237
Female,2.931515,2.773519,17.977879,18.105185


## Ex 1. 성별, 요일별, 시간별 팁의 평균

In [104]:
# groupby
result = tips.groupby(by=['sex', 'day', 'time']).tip.mean()
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tip
sex,day,time,Unnamed: 3_level_1
Male,Thur,Lunch,2.980333
Male,Thur,Dinner,
Male,Fri,Lunch,1.9
Male,Fri,Dinner,3.032857
Male,Sat,Lunch,
Male,Sat,Dinner,3.083898
Male,Sun,Lunch,
Male,Sun,Dinner,3.220345
Female,Thur,Lunch,2.561935
Female,Thur,Dinner,3.0


In [106]:
result.unstack()

Unnamed: 0_level_0,time,Lunch,Dinner
sex,day,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Thur,2.980333,
Male,Fri,1.9,3.032857
Male,Sat,,3.083898
Male,Sun,,3.220345
Female,Thur,2.561935,3.0
Female,Fri,2.745,2.81
Female,Sat,,2.801786
Female,Sun,,3.367222


In [108]:
result.unstack(level=1)

Unnamed: 0_level_0,day,Thur,Fri,Sat,Sun
sex,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Male,Lunch,2.980333,1.9,,
Male,Dinner,,3.032857,3.083898,3.220345
Female,Lunch,2.561935,2.745,,
Female,Dinner,3.0,2.81,2.801786,3.367222


In [109]:
# pivot_table
tips.pivot_table(values='tip', index=['sex', 'day', 'time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tip
sex,day,time,Unnamed: 3_level_1
Male,Thur,Lunch,2.980333
Male,Fri,Lunch,1.9
Male,Fri,Dinner,3.032857
Male,Sat,Dinner,3.083898
Male,Sun,Dinner,3.220345
Female,Thur,Lunch,2.561935
Female,Thur,Dinner,3.0
Female,Fri,Lunch,2.745
Female,Fri,Dinner,2.81
Female,Sat,Dinner,2.801786


In [111]:
tips.pivot_table(values='tip', index=['sex', 'day'], columns='time')

Unnamed: 0_level_0,time,Lunch,Dinner
sex,day,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Thur,2.980333,
Male,Fri,1.9,3.032857
Male,Sat,,3.083898
Male,Sun,,3.220345
Female,Thur,2.561935,3.0
Female,Fri,2.745,2.81
Female,Sat,,2.801786
Female,Sun,,3.367222


In [112]:
tips.pivot_table(values='tip', index=['sex', 'time'], columns='day')

Unnamed: 0_level_0,day,Thur,Fri,Sat,Sun
sex,time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Male,Lunch,2.980333,1.9,,
Male,Dinner,,3.032857,3.083898,3.220345
Female,Lunch,2.561935,2.745,,
Female,Dinner,3.0,2.81,2.801786,3.367222


## Ex 2. 성별 팁의 최솟값, 중위값, 최댓값

In [116]:
# groupby
min_by_tip = tips.groupby(by=['sex']).tip.min()
max_by_tip = tips.groupby(by=['sex']).tip.max()
median_by_tip = tips.groupby(by=['sex']).tip.median()

print(min_by_tip)
print(max_by_tip)
print(median_by_tip)

sex
Male      1.0
Female    1.0
Name: tip, dtype: float64
sex
Male      10.0
Female     6.5
Name: tip, dtype: float64
sex
Male      3.00
Female    2.75
Name: tip, dtype: float64


In [119]:
tips.groupby(by=['sex']).tip.agg(['min', 'median', 'max'])

Unnamed: 0_level_0,min,median,max
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,1.0,3.0,10.0
Female,1.0,2.75,6.5


In [115]:
# pivot_table
tips.pivot_table(values='tip', index='sex', aggfunc=['min', 'median', 'max'])

Unnamed: 0_level_0,min,median,max
Unnamed: 0_level_1,tip,tip,tip
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Male,1.0,3.0,10.0
Female,1.0,2.75,6.5


## Ex 3. 성별, 요일별 영수증 최솟값, 중위값, 최댓값

In [120]:
# groupby
tips.groupby(by=['sex', 'day']).total_bill.min()
tips.groupby(by=['sex', 'day']).total_bill.median()
tips.groupby(by=['sex', 'day']).total_bill.max()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill
sex,day,Unnamed: 2_level_1
Male,Thur,41.19
Male,Fri,40.17
Male,Sat,50.81
Male,Sun,48.17
Female,Thur,43.11
Female,Fri,22.75
Female,Sat,44.3
Female,Sun,35.26


In [122]:
result = tips.groupby(by=['sex', 'day']).tip.agg(['min', 'median', 'max'])
result

Unnamed: 0_level_0,Unnamed: 1_level_0,min,median,max
sex,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,Thur,1.44,2.53,6.7
Male,Fri,1.5,2.6,4.73
Male,Sat,1.0,3.0,10.0
Male,Sun,1.32,3.085,6.5
Female,Thur,1.25,2.005,5.17
Female,Fri,1.0,3.0,4.3
Female,Sat,1.0,2.625,6.5
Female,Sun,1.01,3.5,5.2


In [123]:
result.unstack()

Unnamed: 0_level_0,min,min,min,min,median,median,median,median,max,max,max,max
day,Thur,Fri,Sat,Sun,Thur,Fri,Sat,Sun,Thur,Fri,Sat,Sun
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Male,1.44,1.5,1.0,1.32,2.53,2.6,3.0,3.085,6.7,4.73,10.0,6.5
Female,1.25,1.0,1.0,1.01,2.005,3.0,2.625,3.5,5.17,4.3,6.5,5.2


In [125]:
result.unstack(level=0)

Unnamed: 0_level_0,min,min,median,median,max,max
sex,Male,Female,Male,Female,Male,Female
day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Thur,1.44,1.25,2.53,2.005,6.7,5.17
Fri,1.5,1.0,2.6,3.0,4.73,4.3
Sat,1.0,1.0,3.0,2.625,10.0,6.5
Sun,1.32,1.01,3.085,3.5,6.5,5.2


In [127]:
# pivot_table
tips.pivot_table(values='total_bill', index=['sex', 'day'], aggfunc=['min', 'median', 'max'])

Unnamed: 0_level_0,Unnamed: 1_level_0,min,median,max
Unnamed: 0_level_1,Unnamed: 1_level_1,total_bill,total_bill,total_bill
sex,day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Male,Thur,7.51,16.975,41.19
Male,Fri,8.58,17.215,40.17
Male,Sat,7.74,18.24,50.81
Male,Sun,7.25,20.725,48.17
Female,Thur,8.35,13.785,43.11
Female,Fri,5.75,15.38,22.75
Female,Sat,3.07,18.36,44.3
Female,Sun,9.6,17.41,35.26


In [129]:
tips.pivot_table(values='total_bill', index='sex', columns= 'day', aggfunc=['min', 'median', 'max'])

Unnamed: 0_level_0,min,min,min,min,median,median,median,median,max,max,max,max
day,Thur,Fri,Sat,Sun,Thur,Fri,Sat,Sun,Thur,Fri,Sat,Sun
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Male,7.51,8.58,7.74,7.25,16.975,17.215,18.24,20.725,41.19,40.17,50.81,48.17
Female,8.35,5.75,3.07,9.6,13.785,15.38,18.36,17.41,43.11,22.75,44.3,35.26


In [130]:
tips.pivot_table(values='total_bill', index='day', columns= 'sex', aggfunc=['min', 'median', 'max'])

Unnamed: 0_level_0,min,min,median,median,max,max
sex,Male,Female,Male,Female,Male,Female
day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Thur,7.51,8.35,16.975,13.785,41.19,43.11
Fri,8.58,5.75,17.215,15.38,40.17,22.75
Sat,7.74,3.07,18.24,18.36,50.81,44.3
Sun,7.25,9.6,20.725,17.41,48.17,35.26


## Ex 4. 성별, 흡연여부별, 요일별 팁의 중위값

In [132]:
# groupby
result = tips.groupby(by=['sex', 'smoker', 'day']).tip.median()
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tip
sex,smoker,day,Unnamed: 3_level_1
Male,Yes,Thur,2.78
Male,Yes,Fri,2.6
Male,Yes,Sat,3.0
Male,Yes,Sun,3.5
Male,No,Thur,2.405
Male,No,Fri,2.5
Male,No,Sat,2.86
Male,No,Sun,3.0
Female,Yes,Thur,2.5
Female,Yes,Fri,2.5


In [134]:
result.unstack()

Unnamed: 0_level_0,day,Thur,Fri,Sat,Sun
sex,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Male,Yes,2.78,2.6,3.0,3.5
Male,No,2.405,2.5,2.86,3.0
Female,Yes,2.5,2.5,2.5,3.5
Female,No,2.0,3.125,2.75,3.5


In [135]:
result.unstack(level=1)

Unnamed: 0_level_0,smoker,Yes,No
sex,day,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,Thur,2.78,2.405
Male,Fri,2.6,2.5
Male,Sat,3.0,2.86
Male,Sun,3.5,3.0
Female,Thur,2.5,2.0
Female,Fri,2.5,3.125
Female,Sat,2.5,2.75
Female,Sun,3.5,3.5


In [136]:
result.unstack(level=[0, 1])

sex,Male,Male,Female,Female
smoker,Yes,No,Yes,No
day,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Thur,2.78,2.405,2.5,2.0
Fri,2.6,2.5,2.5,3.125
Sat,3.0,2.86,2.5,2.75
Sun,3.5,3.0,3.5,3.5


In [98]:
# pivot_table
tips.pivot_table(values='tip', index=['sex', 'smoker', 'day'], aggfunc='median')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,tip
sex,smoker,day,Unnamed: 3_level_1
Male,Yes,Thur,2.78
Male,Yes,Fri,2.6
Male,Yes,Sat,3.0
Male,Yes,Sun,3.5
Male,No,Thur,2.405
Male,No,Fri,2.5
Male,No,Sat,2.86
Male,No,Sun,3.0
Female,Yes,Thur,2.5
Female,Yes,Fri,2.5


## Ex 5. 성별, 흡연여부별, 요일별, 시간별 팁의 중앙값

In [138]:
# groupby
result = tips.groupby(by=['sex', 'smoker', 'day', 'time']).tip.median()
result

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,tip
sex,smoker,day,time,Unnamed: 4_level_1
Male,Yes,Thur,Lunch,2.78
Male,Yes,Thur,Dinner,
Male,Yes,Fri,Lunch,1.92
Male,Yes,Fri,Dinner,3.0
Male,Yes,Sat,Lunch,
Male,Yes,Sat,Dinner,3.0
Male,Yes,Sun,Lunch,
Male,Yes,Sun,Dinner,3.5
Male,No,Thur,Lunch,2.405
Male,No,Thur,Dinner,


In [139]:
result.unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,time,Lunch,Dinner
sex,smoker,day,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,Yes,Thur,2.78,
Male,Yes,Fri,1.92,3.0
Male,Yes,Sat,,3.0
Male,Yes,Sun,,3.5
Male,No,Thur,2.405,
Male,No,Fri,,2.5
Male,No,Sat,,2.86
Male,No,Sun,,3.0
Female,Yes,Thur,2.5,
Female,Yes,Fri,2.5,2.75


In [142]:
result.unstack(level=2)

Unnamed: 0_level_0,Unnamed: 1_level_0,day,Thur,Fri,Sat,Sun
sex,smoker,time,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Male,Yes,Lunch,2.78,1.92,,
Male,Yes,Dinner,,3.0,3.0,3.5
Male,No,Lunch,2.405,,,
Male,No,Dinner,,2.5,2.86,3.0
Female,Yes,Lunch,2.5,2.5,,
Female,Yes,Dinner,,2.75,2.5,3.5
Female,No,Lunch,2.0,3.0,,
Female,No,Dinner,3.0,3.25,2.75,3.5


In [143]:
result.unstack(level=['day', 'time'])

Unnamed: 0_level_0,day,Thur,Thur,Fri,Fri,Sat,Sat,Sun,Sun
Unnamed: 0_level_1,time,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner
sex,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Male,Yes,2.78,,1.92,3.0,,3.0,,3.5
Male,No,2.405,,,2.5,,2.86,,3.0
Female,Yes,2.5,,2.5,2.75,,2.5,,3.5
Female,No,2.0,3.0,3.0,3.25,,2.75,,3.5


In [144]:
# pivot_table
tips.pivot_table(values='tip', index=['sex', 'smoker', 'day', 'time'], aggfunc='median')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,tip
sex,smoker,day,time,Unnamed: 4_level_1
Male,Yes,Thur,Lunch,2.78
Male,Yes,Fri,Lunch,1.92
Male,Yes,Fri,Dinner,3.0
Male,Yes,Sat,Dinner,3.0
Male,Yes,Sun,Dinner,3.5
Male,No,Thur,Lunch,2.405
Male,No,Fri,Dinner,2.5
Male,No,Sat,Dinner,2.86
Male,No,Sun,Dinner,3.0
Female,Yes,Thur,Lunch,2.5


In [146]:
tips.pivot_table(values='tip', index=['sex', 'smoker'], columns=['day', 'time'], aggfunc='median')

Unnamed: 0_level_0,day,Thur,Thur,Fri,Fri,Sat,Sun
Unnamed: 0_level_1,time,Lunch,Dinner,Lunch,Dinner,Dinner,Dinner
sex,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Male,Yes,2.78,,1.92,3.0,3.0,3.5
Male,No,2.405,,,2.5,2.86,3.0
Female,Yes,2.5,,2.5,2.75,2.5,3.5
Female,No,2.0,3.0,3.0,3.25,2.75,3.5


In [147]:
tips.pivot_table(values='tip', index=['day', 'time'], columns=['sex', 'smoker'], aggfunc='median')

Unnamed: 0_level_0,sex,Male,Male,Female,Female
Unnamed: 0_level_1,smoker,Yes,No,Yes,No
day,time,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Thur,Lunch,2.78,2.405,2.5,2.0
Thur,Dinner,,,,3.0
Fri,Lunch,1.92,,2.5,3.0
Fri,Dinner,3.0,2.5,2.75,3.25
Sat,Dinner,3.0,2.86,2.5,2.75
Sun,Dinner,3.5,3.0,3.5,3.5
