#Bondad de ajuste con el test $\chi^2$ de Pearson

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [None]:
population = pd.DataFrame(["blanco"]*1000000+["hispano"]*600000+["negro"]*500000+["asiatico"]*150000+["otro"]*350000)
mallorca = pd.DataFrame(["blanco"]*600+["hispano"]*300+["negro"]*250+["asiatico"]*75+["otro"]*150)

In [None]:
population_table= pd.crosstab(index = population[0], columns="count")
mallorca_table = pd.crosstab(index = mallorca[0], columns="count")

In [None]:
print("población global")
print(population_table)
print("Muestra de Mallorca")
print(mallorca_table)

población global
col_0       count
0                
asiatico   150000
blanco    1000000
hispano    600000
negro      500000
otro       350000
Muestra de Mallorca
col_0     count
0              
asiatico     75
blanco      600
hispano     300
negro       250
otro        150


$$\chi_0 = \sum_{i=1}^n \frac{(o_i-e_i)^2}{e_i} \sim\chi^2_{n-1-k} $$
Donde $n$ es el número de clases o categorías y $k$ es el número de parámetros estimados para la distribución teórica.

In [None]:
o_i = mallorca_table
o_i

col_0,count
0,Unnamed: 1_level_1
asiatico,75
blanco,600
hispano,300
negro,250
otro,150


In [None]:
n = len(mallorca)
n

1375

In [None]:
p_i = population_table/len(population)
p_i

col_0,count
0,Unnamed: 1_level_1
asiatico,0.057692
blanco,0.384615
hispano,0.230769
negro,0.192308
otro,0.134615


In [None]:
e_i = n*p_i
e_i

col_0,count
0,Unnamed: 1_level_1
asiatico,79.326923
blanco,528.846154
hispano,317.307692
negro,264.423077
otro,185.096154


In [None]:
chi_0 = (((o_i-e_i)**2)/e_i).sum()
chi_0

col_0
count    18.194805
dtype: float64

Comprobación con valor crítico de la distribución

In [None]:
alpha = 0.05
crit = stats.chi2.ppf(q = 1-alpha, df=len(o_i)-1)
crit

9.487729036781154

In [None]:
if (chi_0 > crit).bool():
  print("Rechazamos H0")
else:
    print("No hay evidencia para rechazar H0")

Rechazamos H0


$$ p = P(\chi^2_{k-1}>\chi_0)  1-F_{\chi^2_{k-1}}{\chi_0} $$

In [None]:
p_val = 1-stats.chi2.cdf(x=chi_0 ,df=len(o_i)-1)
p_val

array([0.00113047])

In [None]:
if (p_val < alpha):
  print("Rechazamos H0")
else:
    print("No hay evidencia para rechazar H0")

Rechazamos H0


## Automatizar el código con el test de $\chi^2$ de Python

In [None]:
stats.chisquare(f_obs = o_i, f_exp=e_i)

Power_divergenceResult(statistic=array([18.19480519]), pvalue=array([0.00113047]))

##Otra forma de hacer el test de $\chi^2$


In [None]:
from scipy.stats import chi2_contingency
table = [[10,20,30,40],[6,9,15,22]]
stat, pv, dof, expected =chi2_contingency(table)
print("Estadístico = %.3f, p-valor = %.3f, df = %.0f" % (stat,pv,dof))
print(expected)

Estadístico = 0.267, p-valor = 0.966, df = 3
[[10.52631579 19.07894737 29.60526316 40.78947368]
 [ 5.47368421  9.92105263 15.39473684 21.21052632]]


**Contrastes de Independencia y Homogeneidad**

$$
\left.
\begin{array}{ll}
H_0: &\mathrm{La\ raza\ de\ los\ habitantes\ es\ indepte\ de\ su\ tendencia\ política} \\
H_1: &\mathrm{La\ raza\ de\ los\ habitantes\ NO\ es\ indepte\ de\ su\ tendencia\ política}
\end{array}
\right\}
$$

In [None]:
import numpy as np
import pandas as pd
import scipy.stats as stats

In [None]:
np.random.seed(2020)

In [None]:
voter_race = np.random.choice(a=["Asiatico", "Negro", "Hispano", "Blanco", "Otro"],p =[0.05,0.10,0.25,0.55,0.05], size=2000)
voter_politics = np.random.choice(a=["Democrata","Republicano","Independiente"],p=[0.4,0.35,0.25], size=2000)

In [None]:
voters =  pd.DataFrame({
    "raza":voter_race,
    "politica": voter_politics
})

In [None]:
voters_tab = pd.crosstab(voters.raza, voters.politica, margins =True)
voters_tab

politica,Democrata,Independiente,Republicano,All
raza,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Asiatico,50,22,27,99
Blanco,428,277,383,1088
Hispano,207,115,182,504
Negro,92,61,66,219
Otro,37,17,36,90
All,814,492,694,2000


In [None]:
o_ij = voters_tab.iloc[0:5, 0:3]
o_ij

politica,Democrata,Independiente,Republicano
raza,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Asiatico,50,22,27
Blanco,428,277,383
Hispano,207,115,182
Negro,92,61,66
Otro,37,17,36


$$
\chi_0 = \sum_{i=1}^I\sum_{j=1}^J\frac{{o_{ij}-e_{ij}}^2}{e_{ij}}
$$

In [None]:
e_ij = np.outer(voters_tab["All"][0:5], voters_tab.loc["All"][0:3])/2000
e_ij =pd.DataFrame(e_ij)
e_ij.columns = ["Democrata","Independiente","Republicano"]
e_ij.index = ["Asiatico","Blanco","Hispano","Negro","Otro"]
e_ij

Unnamed: 0,Democrata,Independiente,Republicano
Asiatico,40.293,24.354,34.353
Blanco,442.816,267.648,377.536
Hispano,205.128,123.984,174.888
Negro,89.133,53.874,75.993
Otro,36.63,22.14,31.23


In [None]:
chi0 = ((o_ij - e_ij)**2/e_ij).sum().sum()
chi0

10.273214989515225

In [None]:
alpha = 0.05
crit= stats.chi2.ppf( q = 1-alpha, df = (5-1)*(3-1)) ## ppf es para obtener el valor crítico
crit

15.50731305586545

In [None]:
crit > chi0

True

In [None]:
p_val = 1 - stats.chi2.cdf(x = chi0, df =8)
p_val

0.24637107148949777

In [None]:
p_val > alpha

True

In [None]:
stats.chi2_contingency(observed=o_i) ## solo necesita las observaciones

(10.273214989515228,
 0.24637107148949744,
 8,
 array([[ 40.293,  24.354,  34.353],
        [442.816, 267.648, 377.536],
        [205.128, 123.984, 174.888],
        [ 89.133,  53.874,  75.993],
        [ 36.63 ,  22.14 ,  31.23 ]]))

In [2]:
print("Hola mi nombre es KLZ y estoy construyendome como programa")

n1  = int(input("Ingresa dos números y yo haré la suma "))
n2 =int( input())
print("La suma de los números es: ", n1+n2)


Hola mi nombre es KLZ y estoy construyendome como programa
Ingresa dos números y yo haré la suma 5
6
La suma de los números es:  11
