# Tablas de contingencia de dos variables

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
titanic = sns.load_dataset('titanic')
titanic.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [5]:
survived_sex = pd.crosstab(index=titanic['survived'], columns=titanic['sex'])
survived_sex.index = ['died', 'survived']
survived_sex

sex,female,male
died,81,468
survived,233,109


In [6]:
survived_class = pd.crosstab(index=titanic['survived'], columns=titanic['class'])
survived_class.index = ['died', 'survived']
survived_class.columns = ['first', 'second', 'third']
survived_class

Unnamed: 0,first,second,third
died,80,97,372
survived,136,87,119


In [7]:
survived_class = pd.crosstab(
    index=titanic['survived'], 
    columns=titanic['class'],
    margins=True
)
survived_class.index = ['died', 'survived', 'total_class']
survived_class.columns = ['first', 'second', 'third', 'total_surv']
survived_class

Unnamed: 0,first,second,third,total_surv
died,80,97,372,549
survived,136,87,119,342
total_class,216,184,491,891


## Frecuencias relativas globales

In [9]:
survived_class / survived_class.loc['total_class', 'total_surv']

Unnamed: 0,first,second,third,total_surv
died,0.089787,0.108866,0.417508,0.616162
survived,0.152637,0.097643,0.133558,0.383838
total_class,0.242424,0.20651,0.551066,1.0


## Frecuencais relativas marginales

In [10]:
survived_class / survived_class.loc['total_class']

Unnamed: 0,first,second,third,total_surv
died,0.37037,0.527174,0.757637,0.616162
survived,0.62963,0.472826,0.242363,0.383838
total_class,1.0,1.0,1.0,1.0


In [15]:
survived_class.div(survived_class.loc['total_class'], axis=1)

Unnamed: 0,first,second,third,total_surv
died,0.37037,0.527174,0.757637,0.616162
survived,0.62963,0.472826,0.242363,0.383838
total_class,1.0,1.0,1.0,1.0


---

In [12]:
survived_class.T / survived_class['total_surv']

Unnamed: 0,died,survived,total_class
first,0.145719,0.397661,0.242424
second,0.176685,0.254386,0.20651
third,0.677596,0.347953,0.551066
total_surv,1.0,1.0,1.0


In [13]:
(survived_class.T / survived_class['total_surv']).T

Unnamed: 0,first,second,third,total_surv
died,0.145719,0.176685,0.677596,1.0
survived,0.397661,0.254386,0.347953,1.0
total_class,0.242424,0.20651,0.551066,1.0


In [14]:
survived_class.div(survived_class['total_surv'], axis=0)

Unnamed: 0,first,second,third,total_surv
died,0.145719,0.176685,0.677596,1.0
survived,0.397661,0.254386,0.347953,1.0
total_class,0.242424,0.20651,0.551066,1.0


## Tablas multidimensionales

In [17]:
surv_sex_class = pd.crosstab(
    index=titanic['survived'],
    columns= [titanic['sex'], titanic['pclass']],
    margins=True
)
surv_sex_class

sex,female,female,female,male,male,male,All
pclass,1,2,3,1,2,3,Unnamed: 7_level_1
survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,3,6,72,77,91,300,549
1,91,70,72,45,17,47,342
All,94,76,144,122,108,347,891


In [18]:
surv_sex_class['female']

pclass,1,2,3
survived,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,3,6,72
1,91,70,72
All,94,76,144


In [19]:
surv_sex_class['female'][1]

survived
0       3
1      91
All    94
Name: 1, dtype: int64

---

In [20]:
surv_sex_class / surv_sex_class.loc['All']

sex,female,female,female,male,male,male,All
pclass,1,2,3,1,2,3,Unnamed: 7_level_1
survived,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
0,0.031915,0.078947,0.5,0.631148,0.842593,0.864553,0.616162
1,0.968085,0.921053,0.5,0.368852,0.157407,0.135447,0.383838
All,1.0,1.0,1.0,1.0,1.0,1.0,1.0
