# **Z-Test**

Import proportion Ztest and numpy

In [None]:
from statsmodels.stats.proportion import proportions_ztest
import numpy as np

Declare sample variable

In [None]:
significance = 0.05 #alpha value
successes = np.array([320, 300])
samples = np.array([400, 425])

Calculate the value of Z-Test and P-Value

$Z=\frac{(p̂_1-p̂_2)-0}{\sqrt{p̂q̂\frac{1}{n_1}+\frac{1}{n_2}}}$

In [None]:
stat,p_value = proportions_ztest(count=successes,nobs=samples,alternative='two-sided')

Print Z-Stat, P-Value, and the result of the hypothesis

In [None]:
print('z_stat: %0.5f, p_value: %0.5f' % (stat, p_value))
if p_value < significance:
  print ("Reject the null hypothesis")
else:
  print ("Accept the null hypothesis")

z_stat: 3.12644, p_value: 0.00177
Reject the null hypothesis


# **T-Test**

Import numpy and scipy.stats library

In [None]:
import numpy as np
import scipy.stats as stats

##**Example 1**

Declare significance-value and data of 2 rands

In [None]:
significance = 0.05
A = np.array([43, 53, 65, 49, 55, 60, 47, 50, 60, 55])
B = np.array([62, 43, 54, 67, 59, 45, 46, 63, 65, 45])

Calculate t-test value and p-value

$t=\frac{(x̄_A-ȳ_B)-(\mu_A-\mu_B)}{\sqrt{S^2_p+(\frac{1}{n_A}+\frac{1}{n_B})}}$

In [None]:
stat, p_value = stats.ttest_ind(a = A, b = B, equal_var = True)

t-test is less than the p-value so we reject the null hypothesis

In [None]:
print('t_stat: %0.5f, p_value: %0.4f' % (stat, p_value))
if p_value < significance:
  print ("Reject the null hypothesis")
else:
  print ("Accpet the null hypothesis")

t_stat: -0.32795, p_value: 0.7467
Accpet the null hypothesis


## **Example 2**

In [None]:
import numpy as np
import scipy.stats as stats

In [None]:
significance = 0.05
A = np.array([43, 53, 65, 49, 55, 60, 147, 50, 60, 55])
B = np.array([62, 43, 54, 67, 59, 45, 46, 63, 65, 45])

$t=\frac{(X̄-Ȳ)-(\mu_x-\mu_y)}{\sqrt{\frac{S^2_x}{n_x}+\frac{S^2_y}{n_y}}}$

In [None]:
stat, p_value = stats.ttest_ind(A, B, equal_var=False)

In [None]:
print('t_stat: %0.5f, p_value: %0.4f' % (stat, p_value))
if p_value < significance:
  print ("Reject the null hypothesis")
else:
  print ("Accept the null hypothesis")

## **Example 3**

In [None]:
import numpy as np
import scipy.stats as stats

In [None]:
significance = 0.01
group1 = np.array([60, 45, 80, 87, 79, 75, 60, 30, 45])
group2 = np.array([75, 65, 90, 80, 89, 95, 85, 69, 40])

$t=\frac{d̄-μ_D}{S_D/\sqrt{n}}$

In [None]:
stat, p_value = stats.ttest_rel(group1,group2)

In [None]:
print('t_stat: %0.5f, p_value: %0.4f' % (stat, p_value))
if p_value < significance:
  print ("Reject the null hypothesis")
else:
  print ("Accpet the null hypothesis")

# **Chi-Square Test**

## **Example 1**

In [None]:
import pandas as pd
from scipy.stats import chi2_contingency

In [None]:
df = pd.DataFrame(index=["Married","Single"], data={'Male': [25, 35],'Female': [15,25]})

$X^2=∑_{i=1}^r∑_{j=1}^c \frac{(O_{ij}-E_{ij})^2}{E_{ij}}$

In [None]:
chi2, p, dof, expected = chi2_contingency(df,correction=False)
print(f"chi2 statistic: {chi2:.5g}")
print(f"p-value: {p:.5g}")
print(f"degrees of freedom: {dof}")
print("expected frequencies:")
print(expected)

chi2 statistic: 0.17361
p-value: 0.67692
degrees of freedom: 1
expected frequencies:
[[24. 16.]
 [36. 24.]]


## **Example 2**

In [None]:
row1 = [525,105,50]
row2 = [400,200,25]
row3 = [600,150,10]
data=[row1,row2,row3]
chi2, p, dof, expected = chi2_contingency(data)
print(f"chi2 statistic: {chi2:.5g}")
print(f"p-value: {p:.5g}")
print(f"degrees of freedom: {dof}")
print("expected frequencies:")
print(expected)

# **Case Study**

## **HeartDisease Dataset**

In [None]:
from google.colab import files
uploaded = files.upload()

Saving HeartDisease.csv to HeartDisease.csv


In [None]:
import numpy as np
import pandas as pd
df = pd.read_csv('HeartDisease.csv')

In [None]:
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,target
0,63,1,3,145,233,1,0,1
1,37,1,2,130,250,0,1,1
2,41,0,1,130,204,0,0,1
3,56,1,1,120,236,0,1,1
4,57,0,0,120,354,0,1,1
...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,0
299,45,1,3,110,264,0,1,0
300,68,1,0,144,193,1,1,0
301,57,1,0,130,131,0,1,0


In [None]:
df['Gender'] = df.sex.replace({1: 'Male', 0: 'Female'})
p = df.groupby('Gender')['target'].agg([lambda z: np.sum(z==1),'size'])
p.columns = ['HeartDisease', 'Total']
p

Unnamed: 0_level_0,HeartDisease,Total
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,0,96
Male,0,207


### **Z-Test**

In [None]:
from statsmodels.stats.proportion import proportions_ztest
import numpy as np

In [None]:
significance = 0.01
successes = np.array([ p.HeartDisease.Female, p.HeartDisease.Male])
samples = np.array([p.Total.Female, p.Total.Male])

In [None]:
stat, p_value = proportions_ztest(count=successes, nobs=samples, alternative='two-sided')

In [None]:
print('z_stat: %0.5f, p_value: %0.6f' % (stat, p_value))
if p_value < significance:
  print ("Reject the null hypothesis")
else:
  print ("Accept the null hypothesis")

z_stat: 4.89023, p_value: 0.000001
Reject the null hypothesis


### **Chi-Square Test**

In [None]:
df['target'] = df['target'].replace({1: 'Yes', 0: 'No'})
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,target
0,63,1,3,145,233,1,0,Yes
1,37,1,2,130,250,0,1,Yes
2,41,0,1,130,204,0,0,Yes
3,56,1,1,120,236,0,1,Yes
4,57,0,0,120,354,0,1,Yes
...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,No
299,45,1,3,110,264,0,1,No
300,68,1,0,144,193,1,1,No
301,57,1,0,130,131,0,1,No


In [None]:
Table1 = pd.crosstab(df.Gender, df.target, margins=True)

In [None]:
Table1 = pd.crosstab(df.Gender, df.target)

In [None]:
Table1

target,No,Yes
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,24,72
Male,114,93


In [None]:
from scipy.stats import chi2_contingency
chi2, p, dof, expected = chi2_contingency(Table1,correction=False)
print(f"chi2 statistic: {chi2:.5g}")
print(f"p-value: {p:.5g}")
print(f"degrees of freedom: {dof}")
print("expected frequencies:")
print(expected)
significance = 0.01
if p < significance:
  print ("sex and have heart disease are dependent")
else:
  print ("sex and have heart disease are independent")

chi2 statistic: 23.914
p-value: 1.0072e-06
degrees of freedom: 1
expected frequencies:
[[ 43.72277228  52.27722772]
 [ 94.27722772 112.72277228]]
sex and have heart disease are dependent
