In [29]:
import numpy as np

In [30]:
data = [
    [83, 85],
    [84, 85, 85, 86, 86, 87],
    [86, 87, 87, 87, 88, 88, 88, 88, 88, 89, 90],
    [89, 90, 90, 91],
    [90, 92]
]
alpha = 0.05

In [31]:
Y = np.concatenate(data).reshape(-1, 1)


In [32]:
total_elements = sum(len(sublist) for sublist in data)
num_groups = len(data)

Psi = np.zeros((total_elements, num_groups), dtype=int)

current_row = 0
for group_idx in range(num_groups):
    group_length = len(data[group_idx])
    Psi[current_row:current_row + group_length, group_idx] = 1
    current_row += group_length


In [33]:
F = Psi.transpose() @ Psi
print(F)

[[ 2  0  0  0  0]
 [ 0  6  0  0  0]
 [ 0  0 11  0  0]
 [ 0  0  0  4  0]
 [ 0  0  0  0  2]]


In [34]:
F_inverse = np.linalg.inv(F)
print(F_inverse)

[[0.5        0.         0.         0.         0.        ]
 [0.         0.16666667 0.         0.         0.        ]
 [0.         0.         0.09090909 0.         0.        ]
 [0.         0.         0.         0.25       0.        ]
 [0.         0.         0.         0.         0.5       ]]


In [35]:
beta = F_inverse @ Psi.T @ Y
print(beta)

[[84.        ]
 [85.5       ]
 [87.81818182]
 [90.        ]
 [91.        ]]


In [36]:
e = Y - (Psi @ beta)
print(e)

[[-1.        ]
 [ 1.        ]
 [-1.5       ]
 [-0.5       ]
 [-0.5       ]
 [ 0.5       ]
 [ 0.5       ]
 [ 1.5       ]
 [-1.81818182]
 [-0.81818182]
 [-0.81818182]
 [-0.81818182]
 [ 0.18181818]
 [ 0.18181818]
 [ 0.18181818]
 [ 0.18181818]
 [ 0.18181818]
 [ 1.18181818]
 [ 2.18181818]
 [-1.        ]
 [ 0.        ]
 [ 0.        ]
 [ 1.        ]
 [-1.        ]
 [ 1.        ]]


In [37]:
RSS = (e.T @ e) [0][0]
RSS

23.136363636363633

In [38]:
all_values = [num for group in data for num in group]
y_mean = np.mean(all_values)
TSS = sum((y - y_mean) ** 2 for y in all_values)
TSS

122.16

In [39]:
R_square = (TSS - RSS) / TSS
R_square

0.8106060606060607

In [40]:
from scipy.stats import f

In [41]:
delta = float(((TSS - RSS) * (20)) / (RSS * (4)))
p_val = float(f.sf(delta, 4, 20) + f.cdf(-delta, 4, 20))
print(f"{delta = } {p_val = }")

delta = 21.400000000000002 p_val = 5.407435042473705e-07


## Регрессия значима

# Проверка значимости коэффицентов

In [42]:
from scipy.stats import t

In [43]:
df = 20 #степени свободы

In [44]:
print("p-value beta_i")
for i in range(len(beta)):
    delta = (beta[i] * df**0.5) / ((RSS * F_inverse[i][i]) ** 0.5)
    print(f"beta {i + 1} - {2 * t.sf(delta, df)}")

p-value beta_i
beta 1 - [2.43373331e-29]
beta 2 - [2.92367243e-34]
beta 3 - [4.00132213e-37]
beta 4 - [6.03309738e-33]
beta 5 - [4.92078904e-30]


## все беты значимы

# Проверка равенства коэффицентов регрессии

In [45]:
def p_value_for_equality(beta1, beta2, RSS, F_inv_ii, F_inv_jj, df):
    delta = (beta1 - beta2) / ((RSS * (F_inv_ii + F_inv_jj)) ** 0.5)
    return 2 * t.sf(abs(delta), df)

In [46]:
p_values = []
for i in range(len(beta)):
    for j in range(i + 1, len(beta)):
        print(f"p-value равенства beta {i + 1} и beta {j + 1} =", end=" ")
        p_val = p_value_for_equality(beta[i], beta[j], RSS, F_inverse[i][i], F_inverse[j][j], df)
        p_values.append(p_val[0])
        print(p_val[0])

p-value равенства beta 1 и beta 2 = 0.706537618418196
p-value равенства beta 1 и beta 3 = 0.31409893297139535
p-value равенства beta 1 и beta 4 = 0.16523557903947017
p-value равенства beta 1 и beta 5 = 0.1611055473528507
p-value равенства beta 2 и beta 3 = 0.35364296187248634
p-value равенства beta 2 и beta 4 = 0.16274229159822798
p-value равенства beta 2 и beta 5 = 0.17670875048429363
p-value равенства beta 3 и beta 4 = 0.4463237073584625
p-value равенства beta 3 и beta 5 = 0.3996941410569338
p-value равенства beta 4 и beta 5 = 0.812726249776298


# Проверка Холм-Бонферрони

In [47]:
m = len(p_values)
p_values = np.array(p_values)
ind = np.argsort(p_values)
k = 0
for i in range(len(beta)):
    for j in range(i+1, len(beta)):
        mi = m - np.argwhere(ind == k)[0][0]
        print(f"коэффиценты {i+1, j+1} равны," if p_values[k] > alpha / (m - ind[k]) else f"коэффиценты {i+1, j+1} не равны,","p-value = ", p_values[k], "alpha/m", alpha / mi)
        k += 1

коэффиценты (1, 2) равны, p-value =  0.706537618418196 alpha/m 0.025
коэффиценты (1, 3) равны, p-value =  0.31409893297139535 alpha/m 0.008333333333333333
коэффиценты (1, 4) равны, p-value =  0.16523557903947017 alpha/m 0.00625
коэффиценты (1, 5) равны, p-value =  0.1611055473528507 alpha/m 0.005
коэффиценты (2, 3) равны, p-value =  0.35364296187248634 alpha/m 0.01
коэффиценты (2, 4) равны, p-value =  0.16274229159822798 alpha/m 0.005555555555555556
коэффиценты (2, 5) равны, p-value =  0.17670875048429363 alpha/m 0.0071428571428571435
коэффиценты (3, 4) равны, p-value =  0.4463237073584625 alpha/m 0.016666666666666666
коэффиценты (3, 5) равны, p-value =  0.3996941410569338 alpha/m 0.0125
коэффиценты (4, 5) равны, p-value =  0.812726249776298 alpha/m 0.05


## все коэффиценты попарно равны