In [80]:
import matplotlib.pyplot as plt
import pandas as pd

# Normality tests
from scipy.stats import normaltest as dagostino_test # D'Agostino test
from scipy.stats import anderson as and_dar_test # Anderson-Darling Test
from scipy.stats import kstest as kol_smi_test # Kolmogorov-Smirnov Test
from scipy.stats import shapiro as shap_wil_test # Shapiro-Wilk Test
from scipy.stats import jarque_bera as jarque_bera_test # Jarque-Bera Test

# ANOVA
from scipy.stats import f_oneway # Parametric
from scipy.stats import kruskal # Non-Parametric

# Statistical Test
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scikit_posthocs import posthoc_dunn


In [81]:
date = pd.read_csv('D:/thesis_data/VEG_INDICES/samples/campestre_20170112_distribution.csv')

In [82]:
date

Unnamed: 0.1,Unnamed: 0,DpRVI,PRVI,DPSVI,DPSVIm,RVI
0,3312,0.673583,0.008824,0.030198,0.009195,0.696071
1,3313,0.572998,0.008162,0.029895,0.008925,0.698186
2,3314,0.528433,0.006260,0.028326,0.007686,0.707225
3,3315,0.458040,0.004134,0.026111,0.006883,0.692201
4,3316,0.384959,0.002841,0.026787,0.008512,0.653687
...,...,...,...,...,...,...
779554,5440584,0.403111,0.002716,0.021809,0.002600,0.855248
779555,5443019,0.521546,0.004391,0.028347,0.004724,0.838521
779556,5443020,0.538302,0.004688,0.021946,0.003128,0.805824
779557,5443021,0.461475,0.003599,0.021496,0.002203,0.895944


In [51]:
p_value = 0.05

In [72]:
# Teste de normalidade (d'agostino e Pearson)

print('H0: The data come from a normal distribution\n')

estatistica, p_valor = dagostino_test(date['DpRVI'])

print("estatística =", round(estatistica, 3))
print("p-valor =", round(p_valor, 3))
print()

# Estrutura condicional que testará a hipótese nula
if p_valor < 0.05:
    print("Reject H0")
else:
    print("Accept H0")

H0: The data come from a normal distribution

estatística = 6230.453
p-valor = 0.0

Reject H0


In [73]:
# Normality_test (Anderson-Darling)
print('H0: The data come from a normal distribution\n')

statistic, critical_values, significance_level = and_dar_test(date['RVI'])

print("statistic =", round(statistic, 3))
print("critical value =", critical_values[2])
print("significance level (%) = ", significance_level[2])
print()

if statistic > critical_values[2]:
    print("Reject H0")
else:
    print("Accept H0")

H0: The data come from a normal distribution

statistic = 2881.376
critical value = 0.787
significance level (%) =  5.0

Reject H0


In [79]:
# Teste de normalidade (Kolmogorov-Smirnov)

print('H0: The two distribution are identical')
print('H1: The two distribution are not identical\n')

statistic, p_valor = kol_smi_test(date['DpRVI'], 'norm')

print("statistic =", round(estatistica, 3))
print("p-value =", round(p_valor, 3))
print()

# Estrutura condicional que testará a hipótese nula
if p_valor < p_value:
    print("Reject H0")
else:
    print("Accept H0")

H0: The two distribution are identical
H1: The two distribution are not identical

statistic = 4557.607
p-value = 0.0

Reject H0


In [76]:
# Teste de normalidade (Shapiro-Wilk)

print('H0: The data come from a normal distribution\n')

estatistica, p_valor = shap_wil_test(date['DpRVI'])

print("estatística =", round(estatistica, 3))
print("p-valor =", round(p_valor, 3))
print()

# Estrutura condicional que testará a hipótese nula
if p_valor < 0.05:
    print("Reject H0")
else:
    print("Accept H0")

H0: The data come from a normal distribution

estatística = 0.998
p-valor = 0.0

Reject H0


In [77]:
# Teste de normalidade (Shapiro-Wilk)

print('H0: The data come from a normal distribution\n')

estatistica, p_valor = jarque_bera_test(date['DpRVI'])

print("estatística =", round(estatistica, 3))
print("p-valor =", round(p_valor, 3))
print()

# Estrutura condicional que testará a hipótese nula
if p_valor < 0.05:
    print("Reject H0")
else:
    print("Accept H0")

H0: The data come from a normal distribution

estatística = 4557.607
p-valor = 0.0

Reject H0
