# Утилиты

In [71]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_1samp, ttest_rel, ttest_ind, t, f
from typing import Tuple, Optional, List
import plotly.express as px
import plotly.graph_objects as go

In [72]:
def plot_box_plot(data: pd.DataFrame, columns: List[str]) -> go.Figure:
    fig = go.Figure()

    for column in columns:
        fig.add_box(y=data[column], name=column)

    fig.update_layout(showlegend=False, margin={'l': 5, 'r': 5, 't': 5, 'b': 5})

    return fig

In [73]:
def calculate_mean(x: np.ndarray) -> float:
    return x.sum() / len(x)


def calculate_var(x: np.ndarray) -> float:
    return 1 / (len(x) - 1) * sum((x - calculate_mean(x)) ** 2)

In [74]:
def ttest_1_sample(
        x: np.ndarray,
        expected_mean: float,
        alpha: float = 0.05,
        alternative: str = 'two-sided',
) -> Tuple[float, float, Tuple[float, float]]:
    n = len(x)

    mean = calculate_mean(x)
    var = calculate_var(x)
    std = np.sqrt(var)

    statistics = (mean - expected_mean) / std * np.sqrt(n)

    right_tail_p_value = t.sf(statistics, n - 1)
    left_tail_p_value = t.cdf(statistics, n - 1)  
    two_sided_p_value = 2 * min(left_tail_p_value, right_tail_p_value)

    bias = t.ppf(1 - alpha / 2, n - 1) * std / np.sqrt(n)
    interval = mean - bias, mean + bias 

    if alternative == 'less':
        p_value = left_tail_p_value
    elif alternative == 'greater':
        p_value = right_tail_p_value
    else:
        p_value = two_sided_p_value

    return statistics, p_value, interval

In [75]:
def ttest_2_sample_rel(x: np.ndarray, y: np.ndarray) -> Tuple[float, float]:
    return ttest_1_sample(x - y, 0)[:2]

In [76]:
def fisher_test(x: np.ndarray, y: np.ndarray, alternative: str = 'two-sided') -> Tuple[float, float]:
    mean_x = calculate_mean(x)
    mean_y = calculate_mean(y)

    var_x = calculate_var(x)
    var_y = calculate_var(y)

    statistics = var_x / var_y

    right_tail_p_value = f.sf(statistics, len(x) - 1, len(y) - 1)
    left_tail_p_value = f.cdf(statistics, len(x) - 1, len(y) - 1)
    two_sided_p_value = 2 * min(left_tail_p_value, right_tail_p_value)

    if alternative == 'less':
        p_value = left_tail_p_value
    elif alternative == 'greater':
        p_value = right_tail_p_value
    else:
        p_value = two_sided_p_value

    return statistics, p_value

In [77]:
def ttest_2_sample_ind(x: np.ndarray, y: np.ndarray, var_eq: bool = False, alternative: str = 'two-sided') -> Tuple[float, float]:
    n = len(x)
    m = len(y)

    mean_x = calculate_mean(x)
    mean_y = calculate_mean(y)

    var_x = calculate_var(x)
    var_y = calculate_var(y)

    if var_eq:
        statistics = (
                (mean_x - mean_y)
                * np.sqrt(n + m - 2)
                / np.sqrt(1 / n + 1 / m)
                / np.sqrt((n - 1) * var_x + (m - 1) * var_y)
        )
        df = n + m - 2
    else:
        statistics = (mean_x - mean_y) / np.sqrt(var_x / n + var_y / m)
        df = (var_x / n + var_y / m) ** 2 / ((var_x / n) ** 2 / (n - 1) + (var_y / m) ** 2 / (m - 1))

    right_tail_p_value = t.sf(statistics, n - 1)
    left_tail_p_value = t.cdf(statistics, n - 1)  
    two_sided_p_value = 2 * min(left_tail_p_value, right_tail_p_value)

    if alternative == 'less':
        p_value = left_tail_p_value
    elif alternative == 'greater':
        p_value = right_tail_p_value
    else:
        p_value = two_sided_p_value

    return statistics, p_value

# VegStNx

In [78]:
coma = pd.read_csv('VegStNx.csv', index_col=0)
coma

Unnamed: 0,Class,латеральные.отделы.префронтальной.коры.справа,латеральные.отделы.префронтальной.коры.слева,парасагитальные.отделы.префронтальной.коры.справа,парасагитальные.отделы.префронтальной.коры.слева,сенсомоторная.кора.справа,сенсомоторная.кора.слева,передняя.часть.правой.поясной.извилины,передняя.часть.левой.поясной.извилины,задняя.часть.поясной.извилины.справа,...,первичная.зрительная.кора.слева,латеральная.кора.височных.долей.справа,латеральная.кора.височных.долей.слева,медиальная.кора.височных.долей.справа,медиальная.кора.височных.долей.слева,мозжечок,мост,sex,age,et
1,2,-4.97,-5.08,-4.23,-3.93,3.5,3.89,-5.2,-5.21,-5.7,...,-1.21,-3.87,-3.4,-3.79,-3.67,0.0,-0.53,1,43,2
2,1,-1.47,-1.61,-2.95,-2.54,-2.17,-2.44,-2.16,-1.93,-4.43,...,-3.05,-0.77,-0.26,-2.24,-1.34,-0.64,0.0,2,37,2
3,1,-0.92,-0.84,-0.55,-0.85,-0.51,0.0,1.37,1.7,-1.94,...,-1.48,1.1,-0.36,4.74,1.77,0.0,3.36,1,32,1
4,3,-4.51,-4.58,-5.25,-5.17,-4.04,-4.27,-3.65,-3.53,-4.4,...,-2.83,-3.75,-3.95,-0.98,-1.02,-5.28,0.0,2,21,2
5,3,-5.31,-5.8,-4.1,-4.19,-5.67,-5.52,-2.37,-2.32,-4.42,...,-3.28,-3.8,-4.03,-0.81,-1.65,-1.7,0.0,2,33,2
6,1,-1.22,-1.36,-1.22,-1.07,0.29,-0.29,-0.42,0.11,-3.5,...,2.9,-1.15,0.28,-1.42,1.9,0.0,3.16,1,22,1
7,1,-3.88,-4.5,-4.6,-5.99,-4.58,-3.96,-3.79,-4.21,-7.15,...,-7.99,-4.56,-4.71,-8.36,-9.81,1.0,0.0,1,28,1
8,3,-3.14,-1.86,-1.29,-1.25,2.06,1.93,-0.59,-1.29,-3.4,...,-2.76,-1.64,-1.64,-0.85,0.54,0.0,7.48,1,15,2
9,3,-1.62,-1.27,-0.52,-0.29,6.15,6.61,-0.02,0.07,-2.05,...,5.8,0.37,0.53,2.0,3.36,0.0,5.33,1,15,2
10,2,-4.11,-2.58,-1.33,-0.66,-5.69,-5.03,-3.93,-4.45,-8.8,...,-6.84,-0.18,1.15,-3.65,-4.16,0.0,1.98,2,36,2


# Проверка равенства среднего значения выборки заданному $\boldsymbol{\mu}$

In [79]:
fig = plot_box_plot(coma, ['латеральные.отделы.префронтальной.коры.справа']) 
# fig.write_image('1_sample.svg')
fig.show()

column = 'латеральные.отделы.префронтальной.коры.справа'

print(calculate_mean(coma[column]))
print(ttest_1samp(coma[column], -3))
print(ttest_1_sample(coma[column], -3))

-3.4484615384615385
Ttest_1sampResult(statistic=-1.2633683783957312, pvalue=0.21220182433455229)
(-1.2633683783957312, 0.21220182433455229, (-4.16109937288388, -2.735823704039197))


Unsupported

# Проверка равенства дисперсий

In [80]:
column_1 = 'нижние.отделы.теменной.доли.слева'
column_2 = 'мозжечок'

fig = plot_box_plot(coma, [column_1, column_2])
# fig.write_image('2_ind_var_false.svg')
fig.show()

print(calculate_var(coma[column_1]))
print(calculate_var(coma[column_2]))

print(fisher_test(coma[column_1], coma[column_2]))

10.548978393665156
4.2634176093514355
(2.474300985792921, 0.0015416867842078029)


Unsupported

# Проверка равенства средних значений для двух независимых выборок

In [81]:
column_1 = 'латеральные.отделы.префронтальной.коры.справа'
column_2 = 'латеральные.отделы.префронтальной.коры.слева'

print(calculate_mean(coma[column_1]))
print(calculate_mean(coma[column_2]))

print(ttest_ind(coma[column_1], coma[column_2], equal_var=True))
print(ttest_2_sample_ind(coma[column_1], coma[column_2], var_eq=True))

-3.4484615384615385
-3.723846153846154
Ttest_indResult(statistic=0.5268220352009758, pvalue=0.5994610331780352)
(0.5268220352009758, 0.6006012434051156)


# data_big

In [82]:
alcohol = pd.read_csv('data_big.csv', index_col=0)
alcohol

Unnamed: 0,X.1,depressed.mood.1,anxiety.1,suspiciousness.1,irritability.1,craving.to.alcohol.1,weakness.1,insomia.1,headache.1,tremor.1,...,combined2.9,HR.9,SBP.9,DBP.9,MBP.9,SV.9,CO.9,SI.9,CI.9,TPR.9
1,1,1,1,0,1,1,1,1,0,1,...,0,68.0,108.0,70.0,83.0,74.0,5.0,40.0,2.7,1321.0
2,2,1,1,0,0,1,1,2,1,1,...,9,63.0,114.0,70.0,85.0,123.0,7.7,60.0,3.8,879.0
3,3,1,1,0,0,0,2,1,0,2,...,0,64.0,120.0,80.0,93.0,106.0,6.8,55.0,3.5,1098.0
4,4,2,2,0,0,0,2,0,0,1,...,0,56.0,124.0,90.0,101.0,90.0,5.1,43.0,2.4,1600.0
5,5,1,1,0,0,2,2,1,0,1,...,0,66.0,116.0,78.0,90.0,90.0,5.9,45.0,2.2,1228.0
6,6,1,1,0,1,1,2,0,0,2,...,0,62.0,142.0,90.0,107.0,113.0,7.0,56.0,3.4,1225.0
7,7,1,1,0,1,1,2,2,1,1,...,0,85.0,110.0,84.0,93.0,46.0,3.9,21.0,1.8,1899.0
8,8,1,1,0,1,0,1,2,1,1,...,0,68.0,134.0,88.0,103.0,74.0,6.4,33.0,2.9,1290.0
9,9,1,1,0,0,2,1,0,0,1,...,0,76.0,134.0,76.0,96.0,116.0,8.6,60.0,4.4,886.0
10,10,1,1,0,1,1,1,2,1,1,...,0,64.0,110.0,70.0,83.0,43.0,2.7,27.0,1.7,2467.0


# Проверка равенства средних значений для двух зависимых выборок

In [83]:
column_1 = 'anxiety.1'
column_2 = 'anxiety.2'

fig = plot_box_plot(alcohol, [column_1, column_2]) 
# fig.write_image('2_rel_true.svg')
fig.show()

print(calculate_mean(alcohol[column_1]))
print(calculate_mean(alcohol[column_2]))
print(ttest_rel(alcohol[column_1], alcohol[column_2]))
print(ttest_2_sample_rel(alcohol[column_1], alcohol[column_2]))

0.9705882352941176
0.5
Ttest_relResult(statistic=3.884033249820898, pvalue=0.0004670417285765681)
(3.8840332498208987, 0.0004670417285765681)


Unsupported