[Reference1](https://python.plainenglish.io/quick-data-summary-in-pandas-sidetable-introduction-8c35ac18c66c) <br>
[Reference2](https://github.com/chris1610/sidetable)

In [1]:
!python -m pip install -U sidetable

Collecting sidetable
  Downloading sidetable-0.9.0-py3-none-any.whl (17 kB)
Installing collected packages: sidetable
Successfully installed sidetable-0.9.0


In [2]:
import pandas as pd
import sidetable
import seaborn as sns

df = sns.load_dataset('titanic')

In [3]:
df['class'].value_counts()

Third     491
First     216
Second    184
Name: class, dtype: int64

In [4]:
df['class'].value_counts(normalize=True)

Third     0.551066
First     0.242424
Second    0.206510
Name: class, dtype: float64

In [5]:
pd.concat([df['class'].value_counts().rename('count'), 
        df['class'].value_counts(normalize=True).mul(100).rename('percentage')], axis=1)

Unnamed: 0,count,percentage
Third,491,55.106622
First,216,24.242424
Second,184,20.650954


In [6]:
df.stb.freq(['class'])

Unnamed: 0,class,count,percent,cumulative_count,cumulative_percent
0,Third,491,55.106622,491,55.106622
1,First,216,24.242424,707,79.349046
2,Second,184,20.650954,891,100.0


In [7]:
df.stb.freq(['class'], style=True)

Unnamed: 0,class,count,percent,cumulative_count,cumulative_percent
0,Third,491,55.11%,491,55.11%
1,First,216,24.24%,707,79.35%
2,Second,184,20.65%,891,100.00%


In [8]:
df.stb.freq(['sex', 'class'])

Unnamed: 0,sex,class,count,percent,cumulative_count,cumulative_percent
0,male,Third,347,38.945006,347,38.945006
1,female,Third,144,16.161616,491,55.106622
2,male,First,122,13.69248,613,68.799102
3,male,Second,108,12.121212,721,80.920314
4,female,First,94,10.549944,815,91.470258
5,female,Second,76,8.529742,891,100.0


In [9]:
df.stb.freq(['class'], value='fare')

Unnamed: 0,class,fare,percent,cumulative_fare,cumulative_percent
0,First,18177.4125,63.349288,18177.4125,63.349288
1,Third,6714.6951,23.401084,24892.1076,86.750371
2,Second,3801.8417,13.249629,28693.9493,100.0


In [10]:
df.stb.freq(['class', 'who'], value='fare', thresh=80)

Unnamed: 0,class,who,fare,percent,cumulative_fare,cumulative_percent
0,First,woman,9492.9375,33.083412,9492.9375,33.083412
1,First,man,7848.1792,27.351338,17341.1167,60.434751
2,Third,man,3617.5281,12.607285,20958.6448,73.042036
3,Second,man,1886.3583,6.574063,22845.0031,79.616099
4,others,others,5848.9462,20.383901,28693.9493,100.0


In [11]:
df.stb.freq(['class', 'who'], value='fare', thresh=80, other_label='All others')

Unnamed: 0,class,who,fare,percent,cumulative_fare,cumulative_percent
0,First,woman,9492.9375,33.083412,9492.9375,33.083412
1,First,man,7848.1792,27.351338,17341.1167,60.434751
2,Third,man,3617.5281,12.607285,20958.6448,73.042036
3,Second,man,1886.3583,6.574063,22845.0031,79.616099
4,All others,All others,5848.9462,20.383901,28693.9493,100.0


In [12]:
df.stb.counts()

Unnamed: 0,count,unique,most_freq,most_freq_count,least_freq,least_freq_count
survived,891,2,0,549,1,342
sex,891,2,male,577,female,314
adult_male,891,2,True,537,False,354
alive,891,2,no,549,yes,342
alone,891,2,True,537,False,354
pclass,891,3,3,491,2,184
embarked,889,3,S,644,Q,77
class,891,3,Third,491,Second,184
who,891,3,man,537,child,83
embark_town,889,3,Southampton,644,Queenstown,77


In [13]:
df.stb.counts(exclude='number')

Unnamed: 0,count,unique,most_freq,most_freq_count,least_freq,least_freq_count
sex,891,2,male,577,female,314
adult_male,891,2,True,537,False,354
alive,891,2,no,549,yes,342
alone,891,2,True,537,False,354
embarked,889,3,S,644,Q,77
class,891,3,Third,491,Second,184
who,891,3,man,537,child,83
embark_town,889,3,Southampton,644,Queenstown,77
deck,203,7,C,59,G,4


In [14]:
df.stb.missing()

Unnamed: 0,missing,total,percent
deck,688,891,77.216611
age,177,891,19.86532
embarked,2,891,0.224467
embark_town,2,891,0.224467
survived,0,891,0.0
pclass,0,891,0.0
sex,0,891,0.0
sibsp,0,891,0.0
parch,0,891,0.0
fare,0,891,0.0


In [15]:
df.stb.missing(style=True)

Unnamed: 0,missing,total,percent
deck,688,891,77.22%
age,177,891,19.87%
embarked,2,891,0.22%
embark_town,2,891,0.22%
survived,0,891,0.00%
pclass,0,891,0.00%
sex,0,891,0.00%
sibsp,0,891,0.00%
parch,0,891,0.00%
fare,0,891,0.00%


In [16]:
df.stb.missing(clip_0=True, style=True)

Unnamed: 0,missing,total,percent
deck,688,891,77.22%
age,177,891,19.87%
embarked,2,891,0.22%
embark_town,2,891,0.22%


In [17]:
df.stb.subtotal()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0.0,3.0,male,22.00,1.0,0.0,7.2500,S,Third,man,1.0,,Southampton,no,0.0
1,1.0,1.0,female,38.00,1.0,0.0,71.2833,C,First,woman,0.0,C,Cherbourg,yes,0.0
2,1.0,3.0,female,26.00,0.0,0.0,7.9250,S,Third,woman,0.0,,Southampton,yes,1.0
3,1.0,1.0,female,35.00,1.0,0.0,53.1000,S,First,woman,0.0,C,Southampton,yes,0.0
4,0.0,3.0,male,35.00,0.0,0.0,8.0500,S,Third,man,1.0,,Southampton,no,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
887,1.0,1.0,female,19.00,0.0,0.0,30.0000,S,First,woman,0.0,B,Southampton,yes,1.0
888,0.0,3.0,female,,1.0,2.0,23.4500,S,Third,woman,0.0,,Southampton,no,0.0
889,1.0,1.0,male,26.00,0.0,0.0,30.0000,C,First,man,1.0,C,Cherbourg,yes,1.0
890,0.0,3.0,male,32.00,0.0,0.0,7.7500,Q,Third,man,1.0,,Queenstown,no,1.0


In [18]:
df.groupby(['sex', 'class', 'embark_town']).agg({'fare': ['sum']}).stb.subtotal()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fare
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum
sex,class,embark_town,Unnamed: 3_level_2
female,First,Cherbourg,4972.5333
female,First,Queenstown,90.0
female,First,Southampton,4753.2917
female,First,female | First - subtotal,9815.825
female,Second,Cherbourg,176.8792
female,Second,Queenstown,24.7
female,Second,Southampton,1468.15
female,Second,female | Second - subtotal,1669.7292
female,Third,Cherbourg,337.9833
female,Third,Queenstown,340.1585


In [19]:
summary_table = df.groupby(['sex', 'class', 'embark_town']).agg({'fare': ['sum']})
summary_table.stb.subtotal(sub_level=[1, 2])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,fare
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum
sex,class,embark_town,Unnamed: 3_level_2
female,First,Cherbourg,4972.5333
female,First,Queenstown,90.0
female,First,Southampton,4753.2917
female,First,female | First - subtotal,9815.825
female,Second,Cherbourg,176.8792
female,Second,Queenstown,24.7
female,Second,Southampton,1468.15
female,Second,female | Second - subtotal,1669.7292
female,Third,Cherbourg,337.9833
female,Third,Queenstown,340.1585


In [20]:
df.groupby(['embark_town', 'class', 'sex']).agg({'fare': ['sum'], 'age': ['mean']}).unstack()

Unnamed: 0_level_0,Unnamed: 1_level_0,fare,fare,age,age
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,sum,mean,mean
Unnamed: 0_level_2,sex,female,male,female,male
embark_town,class,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
Cherbourg,First,4972.5333,3928.5417,36.052632,40.111111
Cherbourg,Second,176.8792,254.2125,19.142857,25.9375
Cherbourg,Third,337.9833,402.1462,14.0625,25.0168
Queenstown,First,90.0,90.0,33.0,44.0
Queenstown,Second,24.7,12.35,30.0,57.0
Queenstown,Third,340.1585,465.0458,22.85,28.142857
Southampton,First,4753.2917,4183.0458,32.704545,41.897188
Southampton,Second,1468.15,1865.55,29.719697,30.875889
Southampton,Third,1642.9668,3526.3945,23.223684,26.574766


In [21]:
df.groupby(['embark_town', 'class', 'sex']).agg({'fare': ['sum'], 'age': ['mean']}).unstack().stb.flatten()

Unnamed: 0,embark_town,class,fare_sum_female,fare_sum_male,age_mean_female,age_mean_male
0,Cherbourg,First,4972.5333,3928.5417,36.052632,40.111111
1,Cherbourg,Second,176.8792,254.2125,19.142857,25.9375
2,Cherbourg,Third,337.9833,402.1462,14.0625,25.0168
3,Queenstown,First,90.0,90.0,33.0,44.0
4,Queenstown,Second,24.7,12.35,30.0,57.0
5,Queenstown,Third,340.1585,465.0458,22.85,28.142857
6,Southampton,First,4753.2917,4183.0458,32.704545,41.897188
7,Southampton,Second,1468.15,1865.55,29.719697,30.875889
8,Southampton,Third,1642.9668,3526.3945,23.223684,26.574766


In [22]:
fares = df.groupby(['embark_town', 'class', 'sex']).agg({'fare': ['sum'], 'age': ['mean']}).unstack()
fares.stb.flatten(sep='|', reset=False, levels=[0,2])

Unnamed: 0_level_0,Unnamed: 1_level_0,fare|female,fare|male,age|female,age|male
embark_town,class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Cherbourg,First,4972.5333,3928.5417,36.052632,40.111111
Cherbourg,Second,176.8792,254.2125,19.142857,25.9375
Cherbourg,Third,337.9833,402.1462,14.0625,25.0168
Queenstown,First,90.0,90.0,33.0,44.0
Queenstown,Second,24.7,12.35,30.0,57.0
Queenstown,Third,340.1585,465.0458,22.85,28.142857
Southampton,First,4753.2917,4183.0458,32.704545,41.897188
Southampton,Second,1468.15,1865.55,29.719697,30.875889
Southampton,Third,1642.9668,3526.3945,23.223684,26.574766


In [23]:
df.stb.freq(['fare'])

Unnamed: 0,fare,count,percent,cumulative_count,cumulative_percent
0,8.0500,43,4.826038,43,4.826038
1,13.0000,42,4.713805,85,9.539843
2,7.8958,38,4.264871,123,13.804714
3,7.7500,34,3.815937,157,17.620651
4,26.0000,31,3.479237,188,21.099888
...,...,...,...,...,...
243,6.4500,1,0.112233,887,99.551066
244,6.4375,1,0.112233,888,99.663300
245,6.2375,1,0.112233,889,99.775533
246,5.0000,1,0.112233,890,99.887767


In [24]:
df['fare_bin'] = pd.qcut(df['fare'], q=4, labels=['low', 'medium', 'high', 'x-high'])
df.stb.freq(['fare_bin'])

Unnamed: 0,fare_bin,count,percent,cumulative_count,cumulative_percent
0,medium,224,25.140292,224,25.140292
1,low,223,25.028058,447,50.16835
2,x-high,222,24.915825,669,75.084175
3,high,222,24.915825,891,100.0


In [25]:
df.stb.freq(['deck'])

Unnamed: 0,deck,count,percent,cumulative_count,cumulative_percent
0,C,59,29.064039,59,29.064039
1,B,47,23.152709,106,52.216749
2,D,33,16.256158,139,68.472906
3,E,32,15.763547,171,84.236453
4,A,15,7.389163,186,91.625616
5,F,13,6.403941,199,98.029557
6,G,4,1.970443,203,100.0


In [26]:
df['deck_fillna'] = df['deck'].cat.add_categories('UNK').fillna('UNK')
df.stb.freq(['deck_fillna'])

Unnamed: 0,deck_fillna,count,percent,cumulative_count,cumulative_percent
0,UNK,688,77.216611,688,77.216611
1,C,59,6.621773,747,83.838384
2,B,47,5.274972,794,89.113356
3,D,33,3.703704,827,92.817059
4,E,32,3.59147,859,96.40853
5,A,15,1.683502,874,98.092031
6,F,13,1.459035,887,99.551066
7,G,4,0.448934,891,100.0


In [27]:
df.stb.freq(['deck', 'class'])

Unnamed: 0,deck,class,count,percent,cumulative_count,cumulative_percent
0,C,First,59,29.064039,59,29.064039
1,B,First,47,23.152709,106,52.216749
2,D,First,29,14.285714,135,66.502463
3,E,First,25,12.315271,160,78.817734
4,A,First,15,7.389163,175,86.206897
5,F,Second,8,3.940887,183,90.147783
6,F,Third,5,2.463054,188,92.610837
7,G,Third,4,1.970443,192,94.581281
8,E,Second,4,1.970443,196,96.551724
9,D,Second,4,1.970443,200,98.522167


In [28]:
df.stb.freq(['deck', 'class'], clip_0=False)

Unnamed: 0,deck,class,count,percent,cumulative_count,cumulative_percent
0,C,First,59,29.064039,59,29.064039
1,B,First,47,23.152709,106,52.216749
2,D,First,29,14.285714,135,66.502463
3,E,First,25,12.315271,160,78.817734
4,A,First,15,7.389163,175,86.206897
5,F,Second,8,3.940887,183,90.147783
6,F,Third,5,2.463054,188,92.610837
7,G,Third,4,1.970443,192,94.581281
8,E,Second,4,1.970443,196,96.551724
9,D,Second,4,1.970443,200,98.522167
