In [1]:
import numpy as np
import pandas as pd

In [2]:
tips = pd.read_csv("../examples/tips.csv")
tips['tip_pct'] = tips['tip'] / tips['total_bill']
tips[:6]

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,tip_pct
0,16.99,1.01,Female,No,Sun,Dinner,2,0.059447
1,10.34,1.66,Male,No,Sun,Dinner,3,0.160542
2,21.01,3.5,Male,No,Sun,Dinner,3,0.166587
3,23.68,3.31,Male,No,Sun,Dinner,2,0.13978
4,24.59,3.61,Female,No,Sun,Dinner,4,0.146808
5,25.29,4.71,Male,No,Sun,Dinner,4,0.18624


### Pivot Tables
agrregates table on one or more keys along row/col + heirarchial indexing

In [3]:
# with groupby or df's/pd's pivot_table()
# default groupmeans, aggfunc='mean'
# not working
# tips.pivot_table( index=["day", "smoker"]) # day and smoker on rows

# aggregate tippct and totalbill
tips.pivot_table(["tip_pct", "total_bill"], index=["day", "smoker"]) # day and smoker on rows


Unnamed: 0_level_0,Unnamed: 1_level_0,tip_pct,total_bill
day,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1
Fri,No,0.15165,18.42
Fri,Yes,0.174783,16.813333
Sat,No,0.158048,19.661778
Sat,Yes,0.147906,21.276667
Sun,No,0.160113,20.506667
Sun,Yes,0.18725,24.12
Thur,No,0.160298,17.113111
Thur,Yes,0.163863,19.190588


In [6]:
# smoker in cols, day time in rows
tips.pivot_table(["tip_pct", "size"],index=["time","day"], columns="smoker")

Unnamed: 0_level_0,Unnamed: 1_level_0,size,size,tip_pct,tip_pct
Unnamed: 0_level_1,smoker,No,Yes,No,Yes
time,day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
Dinner,Fri,2.0,2.222222,0.139622,0.165347
Dinner,Sat,2.555556,2.47619,0.158048,0.147906
Dinner,Sun,2.929825,2.578947,0.160113,0.18725
Dinner,Thur,2.0,,0.159744,
Lunch,Fri,3.0,1.833333,0.187735,0.188937
Lunch,Thur,2.5,2.352941,0.160311,0.163863


In [8]:
# margins true : All added combined stat
tips.pivot_table(["tip_pct", "size"],index=["time","day"], columns="smoker", margins=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,size,size,size,tip_pct,tip_pct,tip_pct
Unnamed: 0_level_1,smoker,No,Yes,All,No,Yes,All
time,day,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Dinner,Fri,2.0,2.222222,2.166667,0.139622,0.165347,0.158916
Dinner,Sat,2.555556,2.47619,2.517241,0.158048,0.147906,0.153152
Dinner,Sun,2.929825,2.578947,2.842105,0.160113,0.18725,0.166897
Dinner,Thur,2.0,,2.0,0.159744,,0.159744
Lunch,Fri,3.0,1.833333,2.0,0.187735,0.188937,0.188765
Lunch,Thur,2.5,2.352941,2.459016,0.160311,0.163863,0.161301
All,,2.668874,2.408602,2.569672,0.159328,0.163196,0.160803


In [11]:
# other agg funcs 
# count/len crosstab (frequency/grpsizes)
# fillval to fill NA
tips.pivot_table("tip_pct",index=["time","smoker"], columns="day",aggfunc=len, margins=True,fill_value=0)

Unnamed: 0_level_0,day,Fri,Sat,Sun,Thur,All
time,smoker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dinner,No,3,45,57,1,106
Dinner,Yes,9,42,19,0,70
Lunch,No,1,0,0,44,45
Lunch,Yes,6,0,0,17,23
All,,19,87,76,62,244


### CrossTab 
special pivot case to compute grp frequencies

In [29]:
data  = pd.DataFrame({
    "Sample" : np.arange(1,11),
    "Nationality" : ["USA","Japan" , "USA" , "Japan" , "Japan" , "Japan","USA" , "USA" , "Japan" , "USA"]
})
data["Handedness"]= "Right-handed"
data["Handedness"][[1,4,7]] = "Left-handed"
data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data["Handedness"][[1,4,7]] = "Left-handed"


Unnamed: 0,Sample,Nationality,Handedness
0,1,USA,Right-handed
1,2,Japan,Left-handed
2,3,USA,Right-handed
3,4,Japan,Right-handed
4,5,Japan,Left-handed
5,6,Japan,Right-handed
6,7,USA,Right-handed
7,8,USA,Left-handed
8,9,Japan,Right-handed
9,10,USA,Right-handed


In [31]:
# cols , rows
pd.crosstab(data.Nationality, data.Handedness, margins=True)

Handedness,Left-handed,Right-handed,All
Nationality,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Japan,2,3,5
USA,1,4,5
All,3,7,10


In [32]:
pd.crosstab([tips.time, tips.day], tips.smoker, margins=True)


Unnamed: 0_level_0,smoker,No,Yes,All
time,day,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Dinner,Fri,3,9,12
Dinner,Sat,45,42,87
Dinner,Sun,57,19,76
Dinner,Thur,1,0,1
Lunch,Fri,1,6,7
Lunch,Thur,44,17,61
All,,151,93,244
