### Сравнение эффективности тестов

Давайте оценим эффективность параметрических и непараметрических тестов на различных распределениях

Будем рассматривать 3 распределения: нормальное, комбинация нормальных и экспоненциальное

In [1]:
import numpy as np
import pandas as pd
import random
import matplotlib.pyplot as plt
import scipy.stats as stats

In [2]:
def one_sample_tests(shift=0):
    alltests=[]
    for j in range(1000):
        norm = np.random.normal(0, 1, 100)+shift
           
        norm1 = np.random.normal(-1, 1, 50)
        norm2 = np.random.normal(1, 1, 50)    
        norm_double = np.append(norm1,norm2)+shift
        
        exp = np.random.exponential(1,100)-1+shift
        exp_med = np.random.exponential(1,100)+0.3068690141668341-1+shift
        
        tests=[]
        tests.append(stats.ttest_1samp(norm, 0)[1])
        tests.append(stats.ttest_1samp(norm_double, 0)[1])
        tests.append(stats.ttest_1samp(exp, 0)[1])
        
        tests.append(stats.wilcoxon(norm)[1])
        tests.append(stats.wilcoxon(norm_double)[1])
        tests.append(stats.wilcoxon(exp_med)[1])
        
        tests.append(stats.binom_test(np.sum(norm>0), 100, 0.5))
        tests.append(stats.binom_test(np.sum(norm_double>0), 100, 0.5))
        tests.append(stats.binom_test(np.sum(exp_med>0), 100, 0.5))  
    
        alltests.append(tests)
        
    alltests=np.array(alltests)
    alltests = alltests<0.05
    
    alltests = pd.DataFrame(np.mean(alltests, axis=0).reshape(-1, 1))
    alltests.columns=["Share of H0 rejected:"]
    alltests.index=["T-test - Normal",
          "T-test - Double_Norm",
          "T-test - Exponential",
          "Wilcoxon - Normal",
          "Wilcoxon - Double_Norm",
          "Wilcoxon - Exponential",
          "Binomial - Normal",
          "Binomial - Double_Norm",
          "Binomial - Exponential",]    
    
    return alltests

In [3]:
one_sample_tests(0)

Unnamed: 0,Share of H0 rejected:
T-test - Normal,0.052
T-test - Double_Norm,0.004
T-test - Exponential,0.068
Wilcoxon - Normal,0.052
Wilcoxon - Double_Norm,0.005
Wilcoxon - Exponential,0.398
Binomial - Normal,0.039
Binomial - Double_Norm,0.003
Binomial - Exponential,0.041


Теперь проверим мощность:

In [4]:
one_sample_tests(0.3)

Unnamed: 0,Share of H0 rejected:
T-test - Normal,0.861
T-test - Double_Norm,0.538
T-test - Exponential,0.902
Wilcoxon - Normal,0.836
Wilcoxon - Double_Norm,0.473
Wilcoxon - Exponential,1.0
Binomial - Normal,0.611
Binomial - Double_Norm,0.162
Binomial - Exponential,0.944


Теперь посмотрим тесты с двумя выборками:

In [5]:
def two_sample_tests(shift=0):
    alltests=[]
    for j in range(1000):
        norm = (np.random.normal(0, 1, 200)).reshape(100,2)
        norm[:,0] = norm[:,0] + shift
       
        norm1 = np.random.normal(-1, 1, 100)
        norm2 = np.random.normal(1, 1, 100)   
        norm_double = np.random.permutation(np.append(norm1,norm2)).reshape(100,2)
        norm_double[:,0] = norm_double[:,0] + shift

        exp = (np.random.exponential(1,200)-1).reshape(100,2)
        exp[:,0] = exp[:,0] + shift
        exp_med = (np.random.exponential(1,200)+0.3068690141668341-1).reshape(100,2)
        exp_med[:,0] = exp_med[:,0] + shift

        tests=[]
        tests.append(stats.ttest_ind(norm[:,0], norm[:,1], equal_var=False)[1])
        tests.append(stats.ttest_ind(norm_double[:,0], norm_double[:,1], equal_var=False)[1])
        tests.append(stats.ttest_ind(exp_med[:,0], exp_med[:,1], equal_var=False)[1])

        tests.append(stats.mannwhitneyu(norm[:,0], norm[:,1])[1]*2)
        tests.append(stats.mannwhitneyu(norm_double[:,0], norm_double[:,1])[1]*2)
        tests.append(stats.mannwhitneyu(exp_med[:,0], exp_med[:,1])[1]*2)
    
        alltests.append(tests)
        
    alltests=np.array(alltests)
    alltests = alltests<0.05
    
    alltests = pd.DataFrame(np.mean(alltests, axis=0).reshape(-1, 1))
    alltests.columns=["Share of H0 rejected:"]
    alltests.index=["T-test - Normal",
          "T-test - Double_Norm",
          "T-test - Exponential",
          "Mann-Whitney - Normal",
          "Mann-Whitney - Double_Norm",
          "Mann-Whitney - Exponential"]    
    
    return alltests

In [6]:
two_sample_tests(0)

Unnamed: 0,Share of H0 rejected:
T-test - Normal,0.049
T-test - Double_Norm,0.048
T-test - Exponential,0.044
Mann-Whitney - Normal,0.048
Mann-Whitney - Double_Norm,0.046
Mann-Whitney - Exponential,0.036


In [7]:
two_sample_tests(0.3)

Unnamed: 0,Share of H0 rejected:
T-test - Normal,0.566
T-test - Double_Norm,0.341
T-test - Exponential,0.545
Mann-Whitney - Normal,0.544
Mann-Whitney - Double_Norm,0.31
Mann-Whitney - Exponential,0.895
