In [5]:
from scipy.stats import uniform
from scipy.stats import norm
from scipy.stats import chi2
from scipy.stats import t
from scipy.stats import f

# <b> <u> Use only these imported functions to check the confidence intervals or to obtain P-values </b>

# <b><u> Problem 1 </u> </b>

## <b>The average IQ of a sample of $N$ university students was found to be $\bar{x}$. Write a Python function which performs a statistical test to determine whether the average IQ of university students is greater than $\mu$, assuming that IQs are normally distributed. It is known from previous studies that the standard deviation of IQs among students is approximately $\sigma$. This function can take the above parameters as arguments along with the significance level. Check the function outputs for the following input combinations. Also use only the functions imported above to perform the tests.</b>

IQs are normally distributed. <br>
Assumption is average IQ of university student is greater than hypothesis mean.<br>
**Ho:μ <=X̄** <br>
**Ha:μ >X̄** <br>
Right tailed

In [27]:
input_list1 = [{'sample_size' : 50 , 'sample_mean' : 105 , 'hypothesis_mean' : 100, 'population_std_deviation' : 20, 'significance_level' : 0.05},
              {'sample_size' : 100 , 'sample_mean' : 102 , 'hypothesis_mean' : 100, 'population_std_deviation' : 30, 'significance_level' : 0.05},
              {'sample_size' : 500 , 'sample_mean' : 110 , 'hypothesis_mean' : 100, 'population_std_deviation' : 10, 'significance_level' : 0.01}]

In [28]:
#Creating a function for testing a one sample z test
def one_sample_z_test(sample_size, sample_mean, hypothesis_mean, population_std_deviation, significance_level):

    #Calculating z value
    z = (sample_mean - hypothesis_mean) / (population_std_deviation / (sample_size**0.5))

    #Calculating p value
    p_value = 1 - norm(0,1).cdf(z)    #Right tailed

    reject_null = p_value < significance_level

    #Comparing pvalue with the significance level with
    if p_value > significance_level:
        return f'Since the P-value {round(p_value,2)} which is greater than significance level of  {significance_level} we fail to reject null hypothesis.'
    else:
        return f'Since the P-value {round(p_value,2)} which is less than significance level of  {significance_level} we reject null hypothesis.'

In [29]:
#Applying the test on given data
results1 = [one_sample_z_test(**params) for params in input_list1 ]
results1

['Since the P-value 0.04 which is less than significance level of  0.05 we reject null hypothesis.',
 'Since the P-value 0.25 which is greater than significance level of  0.05 we fail to reject null hypothesis.',
 'Since the P-value 0.0 which is less than significance level of  0.01 we reject null hypothesis.']

# <b><u> Problem 2 </u> </b>

## <b>Write a Python function to perform a statistical test to assess whether the standard deviation of the heights of 10- year-old children is equal to $\sigma$ cm, based on the random sample of $N$ where $N$ < 20 heights in cm. You can use numpy functionality to generate the list of N heights between 120 and 140 which follow the normal distribution. You can use <code>np.randn() </code> to generate the sample heights. Your code should print all the possible conclusions based on the tests. Check the function outputs for N = 10, 15, 18. Also you can take values of $\sigma$ to be 3, 4 and 5 respectively </b>

Normally distributed <br>
**Ho : σ = S** <br>
**Ha : σ ≠ S**

In [30]:
input_list2 = [{'sample_size' : 10 , 'hypothesis_std_dev' : 3,  'significance_level' : 0.05},
              {'sample_size' : 15 , 'hypothesis_std_dev' : 4,  'significance_level' : 0.05},
              {'sample_size' : 18  , 'hypothesis_std_dev' : 5,  'significance_level' : 0.01}]

In [31]:
import numpy as np

#Creating a function for testing a one sample variance test
def one_sample_variance_test(sample_size, hypothesis_std_dev, significance_level):

    #Generating the sample
    heights = 120+20*norm.cdf(np.random.randn(sample_size))

    #calculating sample mean and variance
    sample_mean = np.mean(heights)
    sample_variance = np.var(heights)

    #Calculating chi2 value
    chi_value = (sample_size - 1)*sample_variance/hypothesis_std_dev**2

    #Calculating p value
    p_value = 2*(1 - chi2.cdf(chi_value, (sample_size - 1)))

    #Comparing p value with the significance level
    if p_value > significance_level:
        return f'Since the P-value {round(p_value,2)} which is greater than significance level of  {significance_level} we fail to reject null hypothesis.'
    else:
        return f'Since the P-value {round(p_value,2)} which is less than significance level of  {significance_level} we reject null hypothesis.'

In [32]:
#Applying the test function on given data
result2 = [one_sample_variance_test(**params) for params in input_list2]
result2

['Since the P-value 0.0 which is less than significance level of  0.05 we reject null hypothesis.',
 'Since the P-value 0.28 which is greater than significance level of  0.05 we fail to reject null hypothesis.',
 'Since the P-value 1.68 which is greater than significance level of  0.01 we fail to reject null hypothesis.']

# <b><u> Problem 3 </u></b>

# <b> In a one-year mortality investigation, $m$ of the $M$ ninety-year-old males and $f$ of the $F$ ninety-year-old females present at the start of the investigation died before the end of the year. Assuming that the numbers of deaths follow binomial distributions, write a Python function to test whether there is a difference between male and female mortality rates at this age.</b>

Making an assumption that mortality rate of males and females is equal <br>
**Ho: P_male = P_female** <br>
**Ha: P_male ≠ P_female**

In [33]:
input_list3 = [{'dead_males' : 25 , 'males' : 100,  'dead_females' : 20, 'females' : 150 ,  'significance_level' : 0.05},
              {'dead_males' : 30 , 'males' : 110,  'dead_females' : 20, 'females' : 160 ,  'significance_level' : 0.05},
              {'dead_males' : 20 , 'males' : 100,  'dead_females' : 20, 'females' : 120 ,  'significance_level' : 0.01}]

In [34]:

def check_mortality_rate(dead_males, males, dead_females, females, significance_level):

    #Initializing variables
    theta1 = dead_males/males
    n1 = males
    theta2 = dead_females/females
    n2 = females

    #Calculating z value and p value
    z = (theta1 - theta2) / (((theta1*(1-theta1))/n1) + (theta2*(1-theta2)/n2))**0.5
    p_value = 2*(1 - norm.cdf(z))

    #Comparing p value with significance level
    if p_value > significance_level:
        return f'Since the P-value {round(p_value,2)} which is greater than significance level of  {significance_level} we fail to reject null hypothesis.'
    else:
        return f'Since the P-value {round(p_value,2)} which is less than significance level of  {significance_level} we reject null hypothesis.'

In [36]:
#Applying the function to the input list
result3 = [check_mortality_rate(**params) for params in input_list3]
result3

['Since the P-value 0.02 which is less than significance level of  0.05 we reject null hypothesis.',
 'Since the P-value 0.0 which is less than significance level of  0.05 we reject null hypothesis.',
 'Since the P-value 0.53 which is greater than significance level of  0.01 we fail to reject null hypothesis.']