# Functions

### Mean square root measure evaluation

In [None]:
def eval_mse(y, yhat):
    """ 
    Calculate the mean squared error on a data set.
    Args:
      y    : (ndarray  Shape (m,) or (m,1))  target value of each example
      yhat : (ndarray  Shape (m,) or (m,1))  predicted value of each example
    Returns:
      err: (scalar)             
    """
    m = len(y)
    err = 0.0
    for i in range(m):
        err_i  = ( (yhat[i] - y[i])**2 ) 
        err   += err_i                                                                
    err = err / (2*m) 
    
    return(err)

### Student's and Welsh's T-tests and Mann-Whitney U-Test automatinc evaluation
https://towardsdatascience.com/statistical-significance-testing-of-two-independent-sample-means-with-scipy-638cb834b4d1

In [None]:
from scipy.stats import levene, ttest_ind, mannwhitneyu, shapiro

def check_normality(grp, alpha=0.05):
    shapiro_p = shapiro(grp).pvalue
    if shapiro_p<alpha:
        print(f"Sampled from non-normal distribution (p-value={shapiro_p:.4f}) => Normality assumption is not met")
        return False
    else:
        print(f"Sampled from normal distribution (p-value={shapiro_p:.4f})  => Normality assumption is met")
        return True   

def check_mean_significance(grp1, grp2, alpha=0.05, alternative='two-sided'):  
    print("========== Checking for normality assumption for first group ==========")
    is_grp1_norm = check_normality(grp1, alpha=alpha)
    
    print("\n========== Checking for normality assumption for second group ==========")
    is_grp2_norm = check_normality(grp2, alpha=alpha)
        
    if is_grp1_norm & is_grp2_norm:        
        print("\n========== Checking for equality of population variance ==========")
        levene_pvalue = levene(grp1, grp2).pvalue
        if levene_pvalue < alpha:
            print(f"Groups have unequal variance (p-value = {levene_pvalue:.4f}) => Equal variance assumption is not met")
            equal_var=False
            test = "Welsch's t-test"
        else:
            print(f"Groups have equal variance (p-value = {levene_pvalue:.4f}) => Equal variance assumption is met")
            equal_var=True
            test = "Student's t-test"

        print(f"\n========== Checking for statistical significance of difference in means ({test}) ==========")
        t_pvalue = ttest_ind(grp1, grp2, equal_var=equal_var, alternative=alternative).pvalue
        if t_pvalue < alpha:
            print(f"We reject the null hypothesis (p-value = {t_pvalue:.4f}).")
        else:
            print(f"We do not reject the null hypothesis (p-value = {t_pvalue:.4f}).")

    else:
        print(f"\n========== Checking for statistical significance of difference in means (Mann-Whitney U Test) ==========")
        mw_pvalue = mannwhitneyu(grp1, grp2, alternative=alternative).pvalue
        if mw_pvalue < alpha:
            print(f"We reject the null hypothesis (p-value = {mw_pvalue:.4f})")
        else:
            print(f"We do not reject the null hypothesis (p-value = {mw_pvalue:.4f})")