In [1]:
from scipy import stats
import numpy as np
import pandas as pd

In [2]:
raw_data = pd.read_excel('Data Files\Icecream-Sale-Data.xlsx')
raw_data

Unnamed: 0,Day,Number of ice cream sold
0,1,13
1,2,8
2,3,10
3,4,10
4,5,8
5,6,9
6,7,10
7,8,11
8,9,6
9,10,8


The above data represents the no. of ice-cream sold in 20 days.

Test Hypothesis

H0 : μ <= 10\
H1 : μ > 10

α = 0.05 to test the hypothesis

In [3]:
data = np.array(raw_data['Number of ice cream sold'])
data

array([13,  8, 10, 10,  8,  9, 10, 11,  6,  8, 12, 11, 11, 12, 10, 12,  7,
       10, 11,  8], dtype=int64)

In [4]:
stats.ttest_1samp(data, 10)

Ttest_1sampResult(statistic=-0.35843385854878496, pvalue=0.7239703579964252)

In [5]:
# Since this is a right tailed test, we have to take half of the p-value

actual_p_value = 0.7239703579964252 / 2
actual_p_value

0.3619851789982126

Since P-Value 0.36 is greater than the level of significance(0.05) we can't reject the null hypothesis

### Hypothesis Testing of Proportions

In [6]:
from statsmodels.stats.proportion import proportions_ztest

In [7]:
count = 67
samplesize = 120
P = 0.5

In [8]:
proportions_ztest(count, samplesize, P)

(1.286806739751111, 0.1981616572238455)

Z-value = 1.287\
P-value = 0.198

Because -1.287 > -1.96 and 1.287 < 1.96, we can't reject the null hypothesis


##### Defining the function for calculation of alpha value:

In [9]:
def z_value(x, mu, SEM):
    z = (x - mu)/SEM
    if(z < 0):
        alpha = stats.norm.cdf(z)
    else:
        alpha = 1 - stats.norm.cdf(z)
    print(alpha)

Calculating alpha for different values of x, mu and SEM

In [10]:
x = 48.5
mu = 50
SEM = 0.79

In [11]:
z_value(x, mu, SEM)

0.02879971774715278


In [12]:
x = 51.5
z_value(x, mu, SEM) # alpha/2 

0.02879971774715273


H0 : μ >= 8.3\
H1 : μ < 8.3

Determine the probability of Type II error if μ = 7.4 at 5% significance level. Std = 3.1 and n = 60.

In [13]:
def type_2(mu1, mu2, sigma, n, alpha):
    z = stats.norm.ppf(alpha)
    xbar = mu1 + (z * sigma / np.sqrt(n))
    z2 = (xbar - mu2)/(sigma / np.sqrt(n))
    if(mu1 > mu2):
        beta = 1 - stats.norm.cdf(z2)
    else:
        beta = stats.norm.cdf(z2)
    
    print(beta)

In [14]:
type_2(8.3, 7.4, 3.1, 60, 0.05)

0.27292999450730004


### Hypothesis Testing: Two Sample test

In [15]:
import pandas as pd
import numpy as np
import math
from scipy import stats

In [16]:
def Z_and_P(x1, x2, sigma1, sigma2, n1, n2):
    z = (x1 - x2)/(math.sqrt((sigma1**2 / n1) + (sigma2**2 / n2)))
    if(z < 0):
        p = stats.norm.cdf(z)
    else:
        p = 1 - stats.norm.cdf(z)
    
    return (z, p)

In [17]:
Z_and_P(121, 112, 8, 8, 10, 10)

(2.5155764746872635, 0.00594189462107364)

Population Variance 1 and Variance 2 is unknown but assumed equal

In [18]:
a = [91.5, 94.18, 92.18, 95.39, 91.79, 89.07, 94.72, 89.21]

In [19]:
b = [89.19, 90.95, 90.46, 93.21, 97.19, 97.04, 91.07, 92.75]

In [20]:
stats.ttest_ind(a, b, equal_var=True)

Ttest_indResult(statistic=-0.3535908643461798, pvalue=0.7289136186068217)

In [21]:
stats.t.ppf(0.025, 14)      # critical t-value

-2.1447866879169277