# PD Cross tab
Pandas crosstab(~) method computes the cross tabulation of multiple column variables. By default, the computed statistic is the count. See examples for clarification.
https://www.skytowner.com/explore/pandas_crosstab_method

In [7]:
import pandas as pd
import numpy as np

In [8]:
df = pd.DataFrame({"name":["alex","bob","cathy","doge"], "gender":["male","male","female","male"], "age":[50,20,40,20]})
df

Unnamed: 0,name,gender,age
0,alex,male,50
1,bob,male,20
2,cathy,female,40
3,doge,male,20


In [9]:
pd.crosstab(index=df["age"], columns=df["gender"])

gender,female,male
age,Unnamed: 1_level_1,Unnamed: 2_level_1
20,0,2
40,1,0
50,0,1


In [20]:
pd.crosstab(index=df["age"], columns=df["gender"], rownames=["AGE"], colnames=["GENDER"])

GENDER,female,male
AGE,Unnamed: 1_level_1,Unnamed: 2_level_1
20,0,2
40,1,0
50,0,1


In [21]:
pd.crosstab(index=df["gender"], columns=["AGE"], values=df["age"], aggfunc=np.mean)

col_0,AGE
gender,Unnamed: 1_level_1
female,40
male,30


In [22]:
pd.crosstab(index=df["age"], columns=df["gender"], margins=True)

gender,female,male,All
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20,0,2,2
40,1,0,1
50,0,1,1
All,1,3,4


In [23]:
pd.crosstab(index=df["age"], columns=df["gender"], margins=True, margins_name="TOTAL")

gender,female,male,TOTAL
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
20,0,2,2
40,1,0,1
50,0,1,1
TOTAL,1,3,4


In [24]:
#na
df = pd.DataFrame({"name":["alex","bob","cathy","doge"], "gender":["male","male","female","male"], "age":[30,20,np.NaN,30]})
df

Unnamed: 0,name,gender,age
0,alex,male,30.0
1,bob,male,20.0
2,cathy,female,
3,doge,male,30.0


In [25]:
pd.crosstab(index=df["age"], columns=df["gender"])

gender,male
age,Unnamed: 1_level_1
20.0,1
30.0,2


In [26]:
pd.crosstab(index=df["age"], columns=df["gender"], dropna=False)

gender,female,male
age,Unnamed: 1_level_1,Unnamed: 2_level_1
20.0,0,1
30.0,0,2


In [27]:
df = pd.DataFrame({"name":["alex","bob","cathy","doge"], "gender":["male","male","female","male"], "age":[50,20,40,20]})
df

Unnamed: 0,name,gender,age
0,alex,male,50
1,bob,male,20
2,cathy,female,40
3,doge,male,20


In [29]:
pd.crosstab(index=df["age"], columns=df["gender"])

gender,female,male
age,Unnamed: 1_level_1,Unnamed: 2_level_1
20,0,2
40,1,0
50,0,1


In [30]:
#In this context, to normalize is to divide each value by the sum of the values.
#True or "all"
#To normalize using all values, simply set normalize=True like so:
pd.crosstab(index=df["age"], columns=df["gender"], normalize=True)   # or normalize="all"

gender,female,male
age,Unnamed: 1_level_1,Unnamed: 2_level_1
20,0.0,0.5
40,0.25,0.0
50,0.0,0.25


In [31]:
#To normalize row-wise, set normalize="index" like so:
pd.crosstab(index=df["age"], columns=df["gender"], normalize="index")

gender,female,male
age,Unnamed: 1_level_1,Unnamed: 2_level_1
20,0.0,1.0
40,1.0,0.0
50,0.0,1.0


In [32]:
#To normalize column-wise, set normalize="columns" like so:
pd.crosstab(index=df["age"], columns=df["gender"], normalize="columns")

gender,female,male
age,Unnamed: 1_level_1,Unnamed: 2_level_1
20,0.0,0.666667
40,1.0,0.0
50,0.0,0.333333
