# Data Aggregation and Group Operations

In [2]:
import pandas as pd
import numpy as np

## GroupBy mechanics

In [3]:
nrows=10
df=pd.DataFrame({'company':np.random.choice(list('ab'),nrows),
                 'data1':np.random.randn(nrows)*50+100,
                 'city':np.random.choice(list('MP'),nrows), 
                'income':np.random.randn(nrows)*30000+50000})
df

Unnamed: 0,company,data1,city,income
0,a,104.197842,P,69436.436284
1,a,166.212231,P,61358.943084
2,b,71.036947,M,-3308.182734
3,b,136.189741,P,98193.875266
4,b,131.181647,P,17896.58259
5,a,62.827111,M,42911.476305
6,b,150.557462,P,53921.045188
7,a,172.074381,M,67950.094003
8,b,151.208959,M,22734.648462
9,a,40.486705,P,60729.85798


Lo que conseguimos con groupby es agregarle después una función que nos permita obtener un resultado (count,sum,mean....)

In [4]:
df.groupby('company').sum()

Unnamed: 0_level_0,data1,income
company,Unnamed: 1_level_1,Unnamed: 2_level_1
a,545.798271,302386.807656
b,640.174756,189437.968772


In [5]:
df.groupby('company').mean()

Unnamed: 0_level_0,data1,income
company,Unnamed: 1_level_1,Unnamed: 2_level_1
a,109.159654,60477.361531
b,128.034951,37887.593754


In [6]:
df.groupby(['company','city']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,data1,income
company,city,Unnamed: 2_level_1,Unnamed: 3_level_1
a,M,117.450746,55430.785154
a,P,103.63226,63841.745783
b,M,111.122953,9713.232864
b,P,139.309617,56670.501015


In [7]:
df.groupby(['company','city']).mean()['income']

company  city
a        M       55430.785154
         P       63841.745783
b        M        9713.232864
         P       56670.501015
Name: income, dtype: float64

In [8]:
means = df.groupby(['company','city'])['income'].mean()

In [9]:
means

company  city
a        M       55430.785154
         P       63841.745783
b        M        9713.232864
         P       56670.501015
Name: income, dtype: float64

In [10]:
means['a','M']

55430.785154016456

In [11]:
means.reset_index()

Unnamed: 0,company,city,income
0,a,M,55430.785154
1,a,P,63841.745783
2,b,M,9713.232864
3,b,P,56670.501015


### Iterating over groups

In [19]:
for name, group in df.groupby('company'):
    print(group)

  company       data1 city        income
0       a  104.197842    P  69436.436284
1       a  166.212231    P  61358.943084
5       a   62.827111    M  42911.476305
7       a  172.074381    M  67950.094003
9       a   40.486705    P  60729.857980
  company       data1 city        income
2       b   71.036947    M  -3308.182734
3       b  136.189741    P  98193.875266
4       b  131.181647    P  17896.582590
6       b  150.557462    P  53921.045188
8       b  151.208959    M  22734.648462


In [20]:
all_data=dict(list(df.groupby('company')))

In [21]:
all_data['a']

Unnamed: 0,company,data1,city,income
0,a,104.197842,P,69436.436284
1,a,166.212231,P,61358.943084
5,a,62.827111,M,42911.476305
7,a,172.074381,M,67950.094003
9,a,40.486705,P,60729.85798


### Selecting a column or subset of columns

In [22]:
df.groupby('company')[['data1','city']].max()

Unnamed: 0_level_0,data1,city
company,Unnamed: 1_level_1,Unnamed: 2_level_1
a,172.074381,P
b,151.208959,P


## Data aggregation

In [26]:
df.groupby('city').max()

Unnamed: 0_level_0,company,data1,income
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
M,b,172.074381,67950.094003
P,b,166.212231,98193.875266


In [27]:
df.groupby('city')['income'].quantile(.9)

city
M    60438.508693
P    83815.155775
Name: income, dtype: float64

In [28]:
stats=df.groupby('city').describe() #En este caso se tiene multiindex en columnas
stats

Unnamed: 0_level_0,data1,data1,data1,data1,data1,data1,data1,data1,income,income,income,income,income,income,income,income
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
city,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
M,4.0,114.28685,55.44156,62.827111,68.984488,111.122953,156.425315,172.074381,4.0,32572.009009,30236.155531,-3308.182734,16223.940663,32823.062384,49171.13073,67950.094003
P,6.0,121.470938,44.75613,40.486705,110.943793,133.685694,146.965532,166.212231,6.0,60256.123399,25920.863067,17896.58259,55623.248386,61044.400532,67417.062984,98193.875266


In [29]:
stats['data1','mean'] #Con esto sacaríamos solo la media de los stats

city
M    114.286850
P    121.470938
Name: (data1, mean), dtype: float64

### Column-wise and multiple function application

In [30]:
!wget https://raw.githubusercontent.com/wesm/pydata-book/1st-edition/ch08/tips.csv

Will not apply HSTS. The HSTS database must be a regular and non-world-writable file.
ERROR: could not open HSTS store at '/home/joeldela/.wget-hsts'. HSTS will be disabled.
--2019-11-23 09:23:08--  https://raw.githubusercontent.com/wesm/pydata-book/1st-edition/ch08/tips.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.132.133
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.132.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7943 (7.8K) [text/plain]
Saving to: ‘tips.csv.1’


2019-11-23 09:23:08 (8.53 MB/s) - ‘tips.csv.1’ saved [7943/7943]



In [31]:
tips = pd.read_csv('tips.csv')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [32]:
tips['tip_pct']=tips['tip']/tips['total_bill']
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542
2,21.01,3.50,Male,No,Sun,Dinner,3,0.166587
3,23.68,3.31,Male,No,Sun,Dinner,2,0.139780
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,0.203927
240,27.18,2.00,Female,Yes,Sat,Dinner,2,0.073584
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0.088222
242,17.82,1.75,Male,No,Sat,Dinner,2,0.098204


In [33]:
tips.groupby('sex').mean()

Unnamed: 0_level_0,total_bill,tip,size,tip_pct
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,18.056897,2.833448,2.45977,0.166491
Male,20.744076,3.089618,2.630573,0.157651


In [34]:
tips.groupby('sex').std()

Unnamed: 0_level_0,total_bill,tip,size,tip_pct
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,8.009209,1.159495,0.937644,0.053632
Male,9.246469,1.489102,0.955997,0.064778


Se pueden obtener estas medidas de un golpe

In [35]:
tips.groupby('sex').agg(['count','mean','std'])

Unnamed: 0_level_0,total_bill,total_bill,total_bill,tip,tip,tip,size,size,size,tip_pct,tip_pct,tip_pct
Unnamed: 0_level_1,count,mean,std,count,mean,std,count,mean,std,count,mean,std
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Female,87,18.056897,8.009209,87,2.833448,1.159495,87,2.45977,0.937644,87,0.166491,0.053632
Male,157,20.744076,9.246469,157,3.089618,1.489102,157,2.630573,0.955997,157,0.157651,0.064778


In [36]:
tips.groupby(['sex','smoker']).agg(['count','mean','std'])

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,total_bill,total_bill,tip,tip,tip,size,size,size,tip_pct,tip_pct,tip_pct
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,count,mean,std,count,mean,std,count,mean,std
sex,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Female,No,54,18.105185,7.286455,54,2.773519,1.128425,54,2.592593,1.073146,54,0.156921,0.036421
Female,Yes,33,17.977879,9.189751,33,2.931515,1.219916,33,2.242424,0.613917,33,0.18215,0.071595
Male,No,97,19.791237,8.726566,97,3.113402,1.489559,97,2.71134,0.989094,97,0.160669,0.041849
Male,Yes,60,22.2845,9.911845,60,3.051167,1.50012,60,2.5,0.89253,60,0.152771,0.090588


In [37]:
tips.groupby(['sex','smoker']).agg([np.mean,np.std,np.count_nonzero])

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,total_bill,total_bill,tip,tip,tip,size,size,size,tip_pct,tip_pct,tip_pct
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,count_nonzero,mean,std,count_nonzero,mean,std,count_nonzero,mean,std,count_nonzero
sex,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Female,No,18.105185,7.286455,54.0,2.773519,1.128425,54.0,2.592593,1.073146,54,0.156921,0.036421,54.0
Female,Yes,17.977879,9.189751,33.0,2.931515,1.219916,33.0,2.242424,0.613917,33,0.18215,0.071595,33.0
Male,No,19.791237,8.726566,97.0,3.113402,1.489559,97.0,2.71134,0.989094,97,0.160669,0.041849,97.0
Male,Yes,22.2845,9.911845,60.0,3.051167,1.50012,60.0,2.5,0.89253,60,0.152771,0.090588,60.0


In [38]:
def minimax(series):
    return series.max() - series.min()

In [39]:
tips.groupby(['sex','smoker']).agg([np.mean,np.std,minimax])

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,total_bill,total_bill,tip,tip,tip,size,size,size,tip_pct,tip_pct,tip_pct
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,minimax,mean,std,minimax,mean,std,minimax,mean,std,minimax
sex,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Female,No,18.105185,7.286455,28.58,2.773519,1.128425,4.2,2.592593,1.073146,5,0.156921,0.036421,0.195876
Female,Yes,17.977879,9.189751,41.23,2.931515,1.219916,5.5,2.242424,0.613917,3,0.18215,0.071595,0.360233
Male,No,19.791237,8.726566,40.82,3.113402,1.489559,7.75,2.71134,0.989094,4,0.160669,0.041849,0.220186
Male,Yes,22.2845,9.911845,43.56,3.051167,1.50012,9.0,2.5,0.89253,4,0.152771,0.090588,0.674707


In [40]:
tips.groupby(['sex','smoker']).agg({'total_bill':[np.sum,np.mean],'tip':[minimax,np.std]}) #Aplicamos funciones distintas

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,total_bill,tip,tip
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,mean,minimax,std
sex,smoker,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Female,No,977.68,18.105185,4.2,1.128425
Female,Yes,593.27,17.977879,5.5,1.219916
Male,No,1919.75,19.791237,7.75,1.489559
Male,Yes,1337.07,22.2845,9.0,1.50012


## Group-wise operations and transformations

In [41]:
tips

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542
2,21.01,3.50,Male,No,Sun,Dinner,3,0.166587
3,23.68,3.31,Male,No,Sun,Dinner,2,0.139780
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808
...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,0.203927
240,27.18,2.00,Female,Yes,Sat,Dinner,2,0.073584
241,22.67,2.00,Male,Yes,Sat,Dinner,2,0.088222
242,17.82,1.75,Male,No,Sat,Dinner,2,0.098204


In [42]:
tips_by_smoker=tips.groupby('smoker')['tip_pct'].agg([np.mean,np.std])

In [43]:
merged=tips.merge(tips_by_smoker,left_on='smoker',right_index=True) #En este caso en la nueva tabla smoker es el index
merged.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct,mean,std
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447,0.159328,0.03991
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542,0.159328,0.03991
2,21.01,3.5,Male,No,Sun,Dinner,3,0.166587,0.159328,0.03991
3,23.68,3.31,Male,No,Sun,Dinner,2,0.13978,0.159328,0.03991
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808,0.159328,0.03991


### Apply: General split-apply-combine

In [44]:
def top(df,n=5,col='tip_pct'):
    return df.sort_values(by=col,ascending=False).head(n)

top(merged)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct,mean,std
172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345,0.163196,0.085119
178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667,0.163196,0.085119
67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733,0.163196,0.085119
232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199,0.159328,0.03991
183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535,0.163196,0.085119


In [45]:
merged.groupby('sex').apply(top)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_bill,tip,sex,smoker,day,time,size,tip_pct,mean,std
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Female,178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667,0.163196,0.085119
Female,67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733,0.163196,0.085119
Female,109,14.31,4.0,Female,Yes,Sat,Dinner,2,0.279525,0.163196,0.085119
Female,93,16.32,4.3,Female,Yes,Fri,Dinner,2,0.26348,0.163196,0.085119
Female,221,13.42,3.48,Female,Yes,Fri,Lunch,2,0.259314,0.163196,0.085119
Male,172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345,0.163196,0.085119
Male,232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199,0.159328,0.03991
Male,183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535,0.163196,0.085119
Male,149,7.51,2.0,Male,No,Thur,Lunch,2,0.266312,0.159328,0.03991
Male,181,23.33,5.65,Male,Yes,Sun,Dinner,2,0.242177,0.163196,0.085119


#### Suppressing the group keys

In [46]:
merged.groupby('sex',group_keys=False).apply(top) #Quedarían eliminadas las group keys

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct,mean,std
178,9.6,4.0,Female,Yes,Sun,Dinner,2,0.416667,0.163196,0.085119
67,3.07,1.0,Female,Yes,Sat,Dinner,1,0.325733,0.163196,0.085119
109,14.31,4.0,Female,Yes,Sat,Dinner,2,0.279525,0.163196,0.085119
93,16.32,4.3,Female,Yes,Fri,Dinner,2,0.26348,0.163196,0.085119
221,13.42,3.48,Female,Yes,Fri,Lunch,2,0.259314,0.163196,0.085119
172,7.25,5.15,Male,Yes,Sun,Dinner,2,0.710345,0.163196,0.085119
232,11.61,3.39,Male,No,Sat,Dinner,2,0.29199,0.159328,0.03991
183,23.17,6.5,Male,Yes,Sun,Dinner,4,0.280535,0.163196,0.085119
149,7.51,2.0,Male,No,Thur,Lunch,2,0.266312,0.159328,0.03991
181,23.33,5.65,Male,Yes,Sun,Dinner,2,0.242177,0.163196,0.085119


### Quantile and bucket analysis

In [47]:
pd.cut(merged['total_bill'],bins=5) #Puedes darle el número de cachos en el que lo quieres dividir

0      (12.618, 22.166]
1       (3.022, 12.618]
2      (12.618, 22.166]
3      (22.166, 31.714]
4      (22.166, 31.714]
             ...       
234    (12.618, 22.166]
236     (3.022, 12.618]
237    (31.714, 41.262]
240    (22.166, 31.714]
241    (22.166, 31.714]
Name: total_bill, Length: 244, dtype: category
Categories (5, interval[float64]): [(3.022, 12.618] < (12.618, 22.166] < (22.166, 31.714] < (31.714, 41.262] < (41.262, 50.81]]

In [48]:
merged.groupby(pd.cut(merged['total_bill'],bins=5))['tip_pct'].agg([np.mean,np.std])

Unnamed: 0_level_0,mean,std
total_bill,Unnamed: 1_level_1,Unnamed: 2_level_1
"(3.022, 12.618]",0.190854,0.094609
"(12.618, 22.166]",0.163942,0.041264
"(22.166, 31.714]",0.143799,0.051131
"(31.714, 41.262]",0.12153,0.042792
"(41.262, 50.81]",0.125121,0.05265


In [49]:
merged.groupby(merged['size']>2)['tip_pct'].agg([np.mean,np.std])

Unnamed: 0_level_0,mean,std
size,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.167009,0.067404
True,0.148982,0.04473


In [50]:
merged.groupby(merged['size']==2)['tip_pct'].agg([np.mean,np.std])

Unnamed: 0_level_0,mean,std
size,Unnamed: 1_level_1,Unnamed: 2_level_1
False,0.152087,0.048333
True,0.165719,0.066848


### Example: Filling missing values with group-specific values

## Pivot tables and Cross-tabulation

In [51]:
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542
2,21.01,3.5,Male,No,Sun,Dinner,3,0.166587
3,23.68,3.31,Male,No,Sun,Dinner,2,0.13978
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808


In [52]:
tips.pivot_table(index='size',columns='smoker',values='tip_pct')

smoker,No,Yes
size,Unnamed: 1_level_1,Unnamed: 2_level_1
1,0.159829,0.274755
2,0.164996,0.166706
3,0.149671,0.157543
4,0.147604,0.142036
5,0.178415,0.086116
6,0.156229,


In [54]:
tips.pivot_table(index=['size','sex'],columns=['smoker','time'],values='tip_pct',aggfunc='sum')

Unnamed: 0_level_0,smoker,No,No,Yes,Yes
Unnamed: 0_level_1,time,Dinner,Lunch,Dinner,Lunch
size,sex,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,Female,0.137931,0.181728,0.325733,
1,Male,,,,0.223776
2,Female,2.395629,2.889373,3.182535,1.440599
2,Male,6.809795,2.754814,4.845252,1.534197
3,Female,1.134293,0.260696,0.647477,0.196114
3,Male,2.400929,0.09553,0.833131,0.213789
4,Female,0.706408,0.269698,0.102522,0.115982
4,Male,2.714552,0.147059,1.149056,0.194837
5,Female,0.172194,,,
5,Male,0.241663,0.121389,0.172232,


In [58]:
monster = tips.pivot_table(index=['size','sex'],columns=['smoker','time'],values=['tip_pct','total_bill'],aggfunc=['sum','mean'])
monster

Unnamed: 0_level_0,Unnamed: 1_level_0,sum,sum,sum,sum,sum,sum,sum,sum,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,tip_pct,tip_pct,tip_pct,tip_pct,total_bill,total_bill,total_bill,total_bill,tip_pct,tip_pct,tip_pct,tip_pct,total_bill,total_bill,total_bill,total_bill
Unnamed: 0_level_2,smoker,No,No,Yes,Yes,No,No,Yes,Yes,No,No,Yes,Yes,No,No,Yes,Yes
Unnamed: 0_level_3,time,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch,Dinner,Lunch
size,sex,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4,Unnamed: 8_level_4,Unnamed: 9_level_4,Unnamed: 10_level_4,Unnamed: 11_level_4,Unnamed: 12_level_4,Unnamed: 13_level_4,Unnamed: 14_level_4,Unnamed: 15_level_4,Unnamed: 16_level_4,Unnamed: 17_level_4
1,Female,0.137931,0.181728,0.325733,,7.25,10.07,3.07,,0.137931,0.181728,0.325733,,7.25,10.07,3.07,
1,Male,,,,0.223776,,,,8.58,,,,0.223776,,,,8.58
2,Female,2.395629,2.889373,3.182535,1.440599,263.62,237.11,274.23,114.73,0.159709,0.160521,0.187208,0.180075,17.574667,13.172778,16.131176,14.34125
2,Male,6.809795,2.754814,4.845252,1.534197,635.86,244.22,618.07,178.05,0.166093,0.172176,0.156298,0.15342,15.50878,15.26375,19.937742,17.805
3,Female,1.134293,0.260696,0.647477,0.196114,139.05,34.62,111.52,16.47,0.162042,0.130348,0.161869,0.196114,19.864286,17.31,27.88,16.47
3,Male,2.400929,0.09553,0.833131,0.213789,349.76,22.82,191.6,18.71,0.150058,0.09553,0.138855,0.213789,21.86,22.82,31.933333,18.71
4,Female,0.706408,0.269698,0.102522,0.115982,140.35,58.91,30.14,43.11,0.141282,0.134849,0.102522,0.115982,28.07,29.455,30.14,43.11
4,Male,2.714552,0.147059,1.149056,0.194837,495.54,27.2,242.92,20.53,0.150808,0.147059,0.143632,0.194837,27.53,27.2,30.365,20.53
5,Female,0.172194,,,,29.85,,,,0.172194,,,,29.85,,,
5,Male,0.241663,0.121389,0.172232,,20.69,41.19,58.61,,0.241663,0.121389,0.086116,,20.69,41.19,29.305,


In [60]:
monster['mean','tip_pct']

Unnamed: 0_level_0,smoker,No,No,Yes,Yes
Unnamed: 0_level_1,time,Dinner,Lunch,Dinner,Lunch
size,sex,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,Female,0.137931,0.181728,0.325733,
1,Male,,,,0.223776
2,Female,0.159709,0.160521,0.187208,0.180075
2,Male,0.166093,0.172176,0.156298,0.15342
3,Female,0.162042,0.130348,0.161869,0.196114
3,Male,0.150058,0.09553,0.138855,0.213789
4,Female,0.141282,0.134849,0.102522,0.115982
4,Male,0.150808,0.147059,0.143632,0.194837
5,Female,0.172194,,,
5,Male,0.241663,0.121389,0.086116,
