In [28]:
import numpy as np
from scipy.stats import t,f

def welch_test(data_X, data_Y):
    '''H0: E(x)=E(y).
    returns the p value of the test
    if p > significance level, then H0 is accepted'''
    mean_x = np.mean(data_X)
    mean_y = np.mean(data_Y)
    var_x = np.var(data_X)
    var_y = np.var(data_Y)
    n_x = len(data_X)
    n_y = len(data_Y)
    vn_x = var_x/n_x
    vn_y = var_y/n_y
    t_stat = (mean_x-mean_y)/np.sqrt(vn_x+vn_y)
    df = (vn_x + vn_y)**2/(vn_x**2/(n_x-1)+(vn_y**2/(n_y-1)))
    p_value = 2 * t.sf(np.abs(t_stat), df)
    return p_value

def f_test(data_X, data_Y):
    '''H0: var(x)=var(y).
    returns the p value of the test
    if p > significance level, then H0 is accepted
    this test is sensitive to data_x and data_y '''
    var_x = np.var(data_X)
    var_y = np.var(data_Y)
    n_x = len(data_X)
    n_y = len(data_Y)
    if var_x/var_y > var_y/var_x:
        f_stat = var_x/var_y
        df1 = n_x - 1
        df2 = n_y - 1
    else:
        f_stat = var_y/var_x
        df1 = n_y - 1
        df2 = n_x - 1
    p_value = f.sf(f_stat, df1, df2)
    return p_value

In [29]:
rng = np.random.default_rng()
pts = 100000
a = rng.normal(0.51, 0.06, size=pts)#make two normal distributed data array's
b = rng.normal(0.4, 0.05, size=pts)
p = welch_test(a,b)#use the welch test to check if a and b have the same population mean
alpha = 0.05#significance level
if p > alpha:
    print('the population mean of a and b are equal with a p-value of: ' + str(p))
else:
    print('the population mean of a and b are not equal with a p-value of: ' + str(p))  

the population mean of a and b are not equal with a p-value of: 0.0


In [30]:
rng = np.random.default_rng()
pts = 100000
a = rng.normal(0.51, 0.06, size=pts)#make two normal distributed data array's
b = rng.normal(0.51, 0.06, size=pts)
p = f_test(a,b)#Use f test to check if a and b have the same variance
alpha = 0.05#significance level
if p > alpha:
    print('the variance of a and b are equal with a p-value of: ' + str(p))
else:
    print('the variance of a and b are not equal with a p-value of: ' + str(p)) 

the variance of a and b are equal with a p-value of: 0.2593526008575355
