In [1]:
import numpy as np
import scipy.stats
data = [{'x':1,'y':17}, 
        {'x':2,'y':13}, 
        {'x':3,'y':22}, 
        {'x':4,'y':20},
        {'x':5,'y':20},
        {'x':6,'y':28},
        {'x':7,'y':26},
        {'x':8,'y':28},
        {'x':9,'y':34},
        {'x':10,'y':46},
        {'x':11,'y':44}, 
        {'x':12,'y':47}, 
        {'x':13,'y':45}, 
        {'x':14,'y':54},
        {'x':15,'y':55},
        {'x':16,'y':60},
        {'x':17,'y':58},
        {'x':18,'y':61},
        {'x':19,'y':64},
        {'x':20,'y':70}
       ]


In [2]:
x = [value['x'] for value in data]
y = [value['y'] for value in data]

In [3]:
def get_mean(dataset): 
    return sum(dataset)/len(dataset)

def get_mean_of_squared(dataset):
    res = 0
    for value in dataset:
        res += value ** 2
    return res/len(dataset)

def get_mean_of_prod(x,y):
    res = 0
    for i in range(len(x)):
        res += x[i]*y[i]
    return res/len(x)

In [4]:
x_mean = get_mean(x)
y_mean = get_mean(y)
x2_mean = get_mean_of_squared(x)
xy_mean = get_mean_of_prod(x,y)
print(f'x_mean: {x_mean}\n')
print(f'y_mean: {y_mean}\n')
print(f'x2_mean: {x2_mean}\n')
print(f'xy_mean: {xy_mean}\n')

x_mean: 10.5

y_mean: 40.6

x2_mean: 143.5

xy_mean: 525.15



In [22]:
A = np.array([[1,x_mean],[x_mean,x2_mean]])
b = np.array([y_mean,xy_mean])
solved = np.linalg.solve(A,b)
print(f'y=f(x)={solved[0]}+{solved[1]}x')
print(f'a_0: {solved[0]}')
print(f'a_1: {solved[1]}')

y=f(x)=9.384210526315796+2.972932330827067x
a_0: 9.384210526315796
a_1: 2.972932330827067


In [6]:
def regression(x):
    return 9.384210526315796+2.972932330827067*x

def get_yp(dataset):
    return [regression(x) for x in x]

In [7]:
yp = get_yp(x)
print('y_p:')
print(yp)

y_p:
[12.357142857142863, 15.33007518796993, 18.303007518796996, 21.275939849624066, 24.24887218045113, 27.2218045112782, 30.194736842105264, 33.167669172932335, 36.1406015037594, 39.11353383458647, 42.08646616541354, 45.059398496240604, 48.03233082706767, 51.00526315789474, 53.97819548872181, 56.95112781954887, 59.92406015037594, 62.89699248120301, 65.86992481203008, 68.84285714285714]


In [8]:
def get_squared(dataset):
    dataset_mean =  get_mean(dataset)
    res = 0
    for value in dataset:
        res += (value - dataset_mean) ** 2
    return res

def get_conditioned_regression(yp,y):
    y_mean = get_mean(y)
    res = 0
    for value in yp:
        res += (value - y_mean) ** 2
    return res

def get_sum_of_balances(yp,y):
    res = 0
    for i in range(len(yp)):
        res += (y[i] - yp[i]) ** 2
    return res


In [26]:
squared = get_squared(y)
conditioned_regression = get_conditioned_regression(yp,y)
sum_of_balances = get_sum_of_balances(yp,y)
print(f'Общая вариация: {squared}')
print(f'Остаток: {sum_of_balances}')
print(f'Средний квадрат общей вариации: {squared/19}')
print(f'Средний квадрат регрессии: {conditioned_regression}')
print(f'Средний квадрат остатка: {sum_of_balances/18}')

Общая вариация: 6082.8
Остаток: 205.3127819548871
Средний квадрат общей вариации: 320.14736842105265
Средний квадрат регрессии: 5877.487218045112
Средний квадрат остатка: 11.406265664160394


In [25]:
F_observed = conditioned_regression / (sum_of_balances / 18)
F_critical = scipy.stats.f.ppf(q=0.95, dfn=1, dfd=18)
print(f'F_observed: {F_observed}\n')
print(f'F_critical: {F_critical}\n')
if F_observed > F_critical: 
    print("Регрессия значима")
else:
    print("Регрессия незначима")

F_observed: 515.2858429830154

F_critical: 4.413873419170566

Регрессия значима


In [18]:
def a1_variation(average_sum_of_balances,x,a1):
    t_critical = scipy.stats.t.interval(0.975, len(x) - 2)[1]
    x_mean = get_mean(x)
    s = sum([(xi - x_mean) ** 2 for xi in x]) ** 0.5
    return [a1-(t_critical * average_sum_of_balances ** 0.5)/s, a1+(t_critical * average_sum_of_balances ** 0.5)/s]

def a0_variation(average_sum_of_balances,x,a0):
    t_critical = scipy.stats.t.interval(0.975, len(x) - 2)[1]
    x_mean = get_mean(x)
    s1 = sum([xi**2 for xi in x])
    s2 = len(x)*sum([(xi-x_mean)**2 for xi in x])
    return [a0-t_critical * (s1/s2)**0.5 * average_sum_of_balances**0.5,a0+t_critical * (s1/s2)**0.5 * average_sum_of_balances**0.5]

In [19]:
[left_a0,right_a0] = a0_variation(sum_of_balances/18, x, solved[0])
[left_a1,right_a1] = a1_variation(sum_of_balances/18, x, solved[1])

In [27]:
print(f'Интервал для a_0: ({left_a0},{right_a0})')
print(f'Интервал для a_1: ({left_a1},{right_a1})')

Интервал для a_0: (5.548315353723675,13.220105698907917)
Интервал для a_1: (2.652717988684656,3.2931466729694785)
