In [1]:
'''Blood glucose levels for obese patients have a mean of 100 with a standard deviation of 15. 
   A researcher thinks that a diet high in raw cornstarch will have a positive effect on blood glucose levels. 
   A sample of 36 patients who have tried the raw cornstarch diet have a mean glucose level of 108. 
   Test the hypothesis that the raw cornstarch had an effect or not.'''

import numpy as np
import scipy.stats as stats

def hypothes_test(mean,std_dev,sample_size,sample_mean,alpha):

# Calculate the Z-score
    std_dev_sample = std_dev/np.sqrt(sample_size)
    z_score=(sample_mean-mean)/std_dev_sample
    print('Z score of the sample = ', z_score)

# Find the probability of this occurrence - p value. 

    pval=stats.norm.cdf(z_score)
    print('p value for the z-score = ', pval)

# If pval > alpha, then do not reject the null hypothesis. If pval < alpha, then reject the null hypothesis.

    if pval > alpha:
        print('\nResult: Reject the null hypothesis')
    else:
        print('\nResult: Do not reject the null hypothesis')

# State the Null and Alternate hypothesis
# Null hypothesis - There is no effect of raw cornstarch on the glucose level. H0 = 100
# Alternate hypothesis - There is an effect of raw cornstarch on the glucose level. HA > 100 

print('Null Hypothesis      : There is no effect of raw cornstarch on the glucose level. H0 = 100')
print('Alternate Hypothesis : There is an effect of raw cornstarch on the glucose level. HA > 100')
print('*' * 90)
# Given values in the test are:
mean = 100
std_dev=15
sample_size=36
sample_mean=108

# Assume the signifcance level, alpha to be 5%. Call hypothesis testing function defined above. 

hypothes_test(100,15,36,108,0.05)


Null Hypothesis      : There is no effect of raw cornstarch on the glucose level. H0 = 100
Alternate Hypothesis : There is an effect of raw cornstarch on the glucose level. HA > 100
******************************************************************************************
Z score of the sample =  3.2
p value for the z-score =  0.9993128620620841

Result: Reject the null hypothesis


In [2]:
'''In one state, 52% of the voters are Republicans, and 48% are Democrats. 
   In a second state, 47% of the voters are Republicans, and 53% are Democrats. 
   Suppose a simple random sample of 100 voters are surveyed from each state.
   What is the probability that the survey will show a greater percentage of Republican voters in the second state 
   than in the first state?'''

# Given values :
# Sample Size = n = 100
# Let the proportion of voters from the first state be denoted as P1 and PD1. p1=P1*Sample Size 
# Let the proportion of voters from the second state be denoted as P2 and PD2. p2=P2* Sample Size 

n=100

P1=0.52
PD1=0.48
p1=0.52* 100
P2=0.47
PD2=0.53
p2=0.47 * 100

# Find the mean of difference in proportion
mu=(p1-p2)/n 
print('Mean of difference in proportion : ', mu)

std_dev2=np.sqrt((P1*PD1)/n + (P2*PD2)/n)
print('Standard Deviation of the sample distribution: ', std_dev2)

# Find the probability that p1 is less than p2, ie, p1-p2 < 0
# Get the z score for p1-p2
z_p1_p2 = (0-mu)/std_dev2

# Probability of Z score above:

prob_p1_p2=stats.norm.cdf(z_p1_p2)
print('\nProbability to get a greater percentage of Republican voters from the second state is ', prob_p1_p2)

Mean of difference in proportion :  0.05
Standard Deviation of the sample distribution:  0.07061869440877536

Probability to get a greater percentage of Republican voters from the second state is  0.2394639918222003


In [3]:
'''You take the SAT and score 1100. The mean score for the SAT is 1026 and the standard deviation is 209. 
   How well did you score on the test compared to the average test taker?'''

import scipy.stats as stats
# Given data:
score=1100
population_mean = 1026
sd=209

# Find the Z value of the SAT Score. This will give the value of the score related the mean. 
# If Z score is positive, it shows the % by which the value is above mean.


z_score_test = (score-population_mean)/sd
print ('Z Score of the result in the test = ', z_score_test)

# Calculate the probability that anyone has score less than 1100. 
pscore= stats.norm.cdf(z_score_test)
print ('Percentage of people who score below me = ', round(pscore*100,2), '%')

# Calculate the probability that anyone has score greater than 1100. My score will be in this percentage. 
print('\nMy score is in the top', round((1-pscore)* 100,2), '%')

Z Score of the result in the test =  0.35406698564593303
Percentage of people who score below me =  63.84 %

My score is in the top 36.16 %
