In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats

from scipy.stats import chi2_contingency

In [15]:
from statsmodels.stats.weightstats import ztest as ztest

In [16]:
# sample data - population weight in kgs
data = [78,62,80,75,61,74,79,93,77,95]

In [17]:
# Define the null hypothesis (average population weight = 80)
null_mean = 80

In [18]:
# Calculate the sample mean and standard deviation
sample_mean = np.mean(data)
sample_std = np.std(data)

In [19]:
sample_mean

77.4

In [20]:
sample_std

10.423051376636307

In [21]:
# Calculate the Z-score
z_score = (sample_mean - null_mean) / sample_std
z_score

-0.2494471058473337

In [22]:
# Calculate the two-tailed p-value using the z-score
p_value = 2 * (1 - stats.norm.cdf(abs(z_score)))
p_value

0.8030149512456652

In [23]:
print("Sample Mean:", sample_mean)
print("Sample Standard Deviation:", sample_std)
print("Z-score:", z_score)
print("P-value:", p_value)

Sample Mean: 77.4
Sample Standard Deviation: 10.423051376636307
Z-score: -0.2494471058473337
P-value: 0.8030149512456652


In [24]:
alpha = 0.05

In [28]:
if p_value < alpha:
    print("Reject the null hypothesis. The average weight is statistically different from 80 at a significance level of", alpha)
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to conclude the average weight is statistically different from 80 at a significance level of", alpha)
    

Fail to reject the null hypothesis. There is not enough evidence to conclude the average weight is statistically different from 80 at a significance level of 0.05


In [29]:
data = [88,92,94,94,96,97,97,97,99,99,105,109,109,109,110,112,112,113,114,115]

In [30]:
ztest(data, value = 100)

(1.5976240527147705, 0.1101266701438426)

The test statistic for the one sample z-test is 1.5976 and the corresponding p-value is 0.1101.

Since this p-value is not less than 0.05, we do not have sufficient evidence to reject the null hypothesis.

## Chi-square test

In [32]:
data = [[207, 282, 241], [234, 242, 232]]
stat, p, dof, expected = chi2_contingency(data)

In [33]:
stat

4.542228269825232

In [34]:
p

0.1031971404730939

In [35]:
dof

2

In [36]:
expected

array([[223.87343533, 266.00834492, 240.11821975],
       [217.12656467, 257.99165508, 232.88178025]])

In [39]:
alpha = 0.05
print("p value is: " + str(p))
if p <= alpha:
    print('Dependent (rejected H0)')
else:
    print('Independent (H0 holds true)')

p value is: 0.1031971404730939
Independent (H0 holds true)


### example

In [40]:
data = [[20, 5], [10, 15]]
stat, p, dof, expected = chi2_contingency(data)

In [41]:
expected

array([[15., 10.],
       [15., 10.]])

In [42]:
stat

6.75

In [43]:
p

0.0093747684594349

In [44]:
dof

1

In [45]:
alpha = 0.05
print("p value is: " + str(p))
if p <= alpha:
    print('Dependent (rejected H0)')
else:
    print('Independent (H0 holds true)')

p value is: 0.0093747684594349
Dependent (rejected H0)


### example

In [53]:
data = [[5,10,5], [15,5,0], [5,10,15]]
stat, p, dof, expected = chi2_contingency(data)

In [54]:
expected

array([[ 7.14285714,  7.14285714,  5.71428571],
       [ 7.14285714,  7.14285714,  5.71428571],
       [10.71428571, 10.71428571,  8.57142857]])

In [55]:
stat

24.791666666666668

In [56]:
p

5.540229051644113e-05

In [57]:
dof

4

In [58]:
alpha = 0.05
print("p value is: " + str(p))
if p <= alpha:
    print('Dependent (rejected H0)')
else:
    print('Independent (H0 holds true)')

p value is: 5.540229051644113e-05
Dependent (rejected H0)


### example

In [59]:
data = [[15,5,5], [10,10,15], [2,3,5]]
stat, p, dof, expected = chi2_contingency(data)

In [60]:
stat

8.07777777777778

In [61]:
p

0.08877028710376675

In [62]:
dof

4

In [63]:
expected

array([[ 9.64285714,  6.42857143,  8.92857143],
       [13.5       ,  9.        , 12.5       ],
       [ 3.85714286,  2.57142857,  3.57142857]])

In [64]:
alpha = 0.05
print("p value is: " + str(p))
if p <= alpha:
    print('Dependent (rejected H0)')
else:
    print('Independent (H0 holds true)')

p value is: 0.08877028710376675
Independent (H0 holds true)
