# Утилиты

In [272]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from typing import Tuple
from scipy.stats import t

In [273]:
class LM:
    alpha: float
    beta: float

    _x_sq_mean: float
    _x_center_sq_sum: float
    _q_r: float
    _q_e: float
    _q: float
    _n: float

    def __str__(self):
        return str(self.__dict__)

    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        n = len(X)

        X_mean = X.mean()
        y_mean = y.mean()

        beta = (X * y).sum() - n * X_mean * y_mean
        beta /= (X ** 2).sum() - n * X_mean ** 2

        alpha = y_mean - beta * X_mean

        self.alpha = alpha
        self.beta = beta

        self._x_sq_mean = (X ** 2).mean()
        self._x_center_sq_sum = sum((X - X_mean) ** 2)
        self._q_r, self._q_e, self._q = calculate_q(y, self.predict(X))
        self._n = n

    def predict(self, X: np.ndarray) -> np.ndarray:
        return self.alpha + self.beta * X

    def get_confidance_interval(self, X: np.ndarray, alpha: float = 0.05) -> Tuple[np.ndarray, np.ndarray]:        
        s = self._q_e * (self._x_sq_mean + X ** 2) / ((self._n - 2) * self._x_center_sq_sum)

        s = np.sqrt(s)

        return (
            self.predict(X) + t.ppf(1 - alpha / 2, df=(self._n - 2)) * s,
            self.predict(X) - t.ppf(1 - alpha / 2, df=(self._n - 2)) * s,
        )
    
    def check_correlation(self) -> Tuple[float, float]:
        df = self._n - 2
        r2 = self._q_r / self._q

        statisitcs = np.sqrt(r2) / np.sqrt(1 - r2) * np.sqrt(df)
        p_value = 2 * min(t.sf(statisitcs, df), t.cdf(statisitcs, df))

        return statisitcs, p_value
        

In [274]:
def calculate_q(y_true: np.ndarray, y_pred: np.ndarray) -> Tuple[float, float, float]:
    """ :returns: q_r, q_e, q """
    y_true_mean = y_true.mean()

    q_r = ((y_pred - y_true_mean) ** 2).sum()
    q_e = ((y_true - y_pred) ** 2).sum()
    q = ((y_true - y_true_mean) ** 2).sum()

    return q_r, q_e, q

In [275]:
def calculate_r2(y_true: np.ndarray, y_pred: np.ndarray) -> float:
    q_r, _, q = calculate_q(y_true, y_pred)
    return q_r / q

# Тестирование

In [276]:
X = np.array([68, 80, 92, 81, 70, 79, 78, 66, 57, 76])
y = np.array([60, 84, 87, 79, 74, 71, 72, 67, 57, 70])

X = np.array([2372, 2732, 2489, 3378, 4130, 1171, 689])
y = np.array([68.9, 68.1, 67.6, 69.2, 69.2, 64.6, 67])

X = np.array([25.3, 28, 30, 23.5, 18, 38.4, 29.6])
y = np.array([68.9, 68.1, 67.6, 69.2, 69.2, 64.6, 67])

lm = LM()
lm.fit(X, y)

clf = LinearRegression()
clf.fit(X.reshape(-1, 1), y.reshape(-1, 1))

print(lm)
print(clf.intercept_, clf.coef_)

{'alpha': 74.46046346064827, 'beta': -0.2418218061438691, '_x_sq_mean': 793.0085714285714, '_x_center_sq_sum': 240.79714285714283, '_q_r': 14.081283771756956, '_q_e': 2.0587162282419103, '_q': 16.140000000000065, '_n': 7}
[74.46046346] [[-0.24182181]]


In [277]:
print(calculate_r2(y, lm.predict(X)))
print(r2_score(y, lm.predict(X)))
print(np.corrcoef(y, lm.predict(X))[0, 1] ** 2)
print()
print(lm.check_correlation())

0.8724463303442936
0.8724463303443679
0.8724463303443678

(5.848006996316052, 0.002070134531735098)


In [278]:
fig = go.Figure()

fig.add_scatter(x=X, y=y, mode='markers', name='Данные')

fig.add_scatter(x=np.sort(X), y=lm.predict(np.sort(X)), name='Моя')
fig.add_scatter(x=np.sort(X), y=clf.predict(np.sort(X).reshape(-1, 1))[:, 0], name='Sklearn')

conf_int_upper, conf_int_lower = lm.get_confidance_interval(np.sort(X))

fig.add_scatter(
    x=np.sort(X),
    y=conf_int_upper,
    marker_color='red',
    line_dash='dot',
    legendgroup="group",
    name='Доверительный интервал',
)

fig.add_scatter(
    x=np.sort(X),
    y=conf_int_lower,
    marker_color='red',
    line_dash='dot',
    legendgroup="group",
    showlegend=False,
    name='Доверительный интервал',
)

fig.show()

Unsupported

# Пример №1

In [279]:
df = pd.read_csv('data_big.csv', index_col=0)
df

Unnamed: 0,X.1,depressed.mood.1,anxiety.1,suspiciousness.1,irritability.1,craving.to.alcohol.1,weakness.1,insomia.1,headache.1,tremor.1,...,combined2.9,HR.9,SBP.9,DBP.9,MBP.9,SV.9,CO.9,SI.9,CI.9,TPR.9
1,1,1,1,0,1,1,1,1,0,1,...,0,68.0,108.0,70.0,83.0,74.0,5.0,40.0,2.7,1321.0
2,2,1,1,0,0,1,1,2,1,1,...,9,63.0,114.0,70.0,85.0,123.0,7.7,60.0,3.8,879.0
3,3,1,1,0,0,0,2,1,0,2,...,0,64.0,120.0,80.0,93.0,106.0,6.8,55.0,3.5,1098.0
4,4,2,2,0,0,0,2,0,0,1,...,0,56.0,124.0,90.0,101.0,90.0,5.1,43.0,2.4,1600.0
5,5,1,1,0,0,2,2,1,0,1,...,0,66.0,116.0,78.0,90.0,90.0,5.9,45.0,2.2,1228.0
6,6,1,1,0,1,1,2,0,0,2,...,0,62.0,142.0,90.0,107.0,113.0,7.0,56.0,3.4,1225.0
7,7,1,1,0,1,1,2,2,1,1,...,0,85.0,110.0,84.0,93.0,46.0,3.9,21.0,1.8,1899.0
8,8,1,1,0,1,0,1,2,1,1,...,0,68.0,134.0,88.0,103.0,74.0,6.4,33.0,2.9,1290.0
9,9,1,1,0,0,2,1,0,0,1,...,0,76.0,134.0,76.0,96.0,116.0,8.6,60.0,4.4,886.0
10,10,1,1,0,1,1,1,2,1,1,...,0,64.0,110.0,70.0,83.0,43.0,2.7,27.0,1.7,2467.0


In [280]:
X = df['HR.1'].to_numpy()
y = df['SBP.1'].to_numpy()

sorter = np.argsort(X)
X = X[sorter]
y = y[sorter]

lm = LM()
lm.fit(X, y)

clf = LinearRegression()
clf.fit(X.reshape(-1, 1), y.reshape(-1, 1))

fig = go.Figure()

fig.add_scatter(x=X, y=y, mode='markers', name='Данные')
fig.add_scatter(x=X, y=lm.predict(X), name='Линия регрессии')
# fig.add_scatter(x=X, y=clf.predict(X.reshape(-1, 1))[:, 0], name='Линия регрессии')

conf_int_upper, conf_int_lower = lm.get_confidance_interval(X)

fig.add_scatter(
    x=X,
    y=conf_int_upper,
    marker_color='red',
    line_dash='dot',
    legendgroup="group",
    name='Доверительный интервал',
)

fig.add_scatter(
    x=X,
    y=conf_int_lower,
    marker_color='red',
    line_dash='dot',
    legendgroup="group",
    showlegend=False,
    name='Доверительный интервал',
)

fig.update_xaxes(title='HR.1')
fig.update_yaxes(title='SBP.1')

fig.show()

print(f'alpha = {lm.alpha}, beta = {lm.beta}')

print(f'r2 = {calculate_r2(y, lm.predict(X))}')
s = np.corrcoef([y, lm.predict(X)])[0, 1]
print(f's = {s}, s2 = {s ** 2}')

print(lm.check_correlation())

fig.update_layout(margin=dict(l=5, r=5, t=5, b=5),)
fig.write_image("example_1.svg")

alpha = 130.52036729642862, beta = 0.1360329061206138
r2 = 0.020789978436146595
s = 0.14418730331116758, s2 = 0.020789978436146637
(0.8242597325382834, 0.41589370939159764)


Unsupported

# Пример №2

In [281]:
df = pd.read_csv('VegStNx.csv', index_col=0)
df

Unnamed: 0,Class,латеральные.отделы.префронтальной.коры.справа,латеральные.отделы.префронтальной.коры.слева,парасагитальные.отделы.префронтальной.коры.справа,парасагитальные.отделы.префронтальной.коры.слева,сенсомоторная.кора.справа,сенсомоторная.кора.слева,передняя.часть.правой.поясной.извилины,передняя.часть.левой.поясной.извилины,задняя.часть.поясной.извилины.справа,...,первичная.зрительная.кора.слева,латеральная.кора.височных.долей.справа,латеральная.кора.височных.долей.слева,медиальная.кора.височных.долей.справа,медиальная.кора.височных.долей.слева,мозжечок,мост,sex,age,et
1,2,-4.97,-5.08,-4.23,-3.93,3.5,3.89,-5.2,-5.21,-5.7,...,-1.21,-3.87,-3.4,-3.79,-3.67,0.0,-0.53,1,43,2
2,1,-1.47,-1.61,-2.95,-2.54,-2.17,-2.44,-2.16,-1.93,-4.43,...,-3.05,-0.77,-0.26,-2.24,-1.34,-0.64,0.0,2,37,2
3,1,-0.92,-0.84,-0.55,-0.85,-0.51,0.0,1.37,1.7,-1.94,...,-1.48,1.1,-0.36,4.74,1.77,0.0,3.36,1,32,1
4,3,-4.51,-4.58,-5.25,-5.17,-4.04,-4.27,-3.65,-3.53,-4.4,...,-2.83,-3.75,-3.95,-0.98,-1.02,-5.28,0.0,2,21,2
5,3,-5.31,-5.8,-4.1,-4.19,-5.67,-5.52,-2.37,-2.32,-4.42,...,-3.28,-3.8,-4.03,-0.81,-1.65,-1.7,0.0,2,33,2
6,1,-1.22,-1.36,-1.22,-1.07,0.29,-0.29,-0.42,0.11,-3.5,...,2.9,-1.15,0.28,-1.42,1.9,0.0,3.16,1,22,1
7,1,-3.88,-4.5,-4.6,-5.99,-4.58,-3.96,-3.79,-4.21,-7.15,...,-7.99,-4.56,-4.71,-8.36,-9.81,1.0,0.0,1,28,1
8,3,-3.14,-1.86,-1.29,-1.25,2.06,1.93,-0.59,-1.29,-3.4,...,-2.76,-1.64,-1.64,-0.85,0.54,0.0,7.48,1,15,2
9,3,-1.62,-1.27,-0.52,-0.29,6.15,6.61,-0.02,0.07,-2.05,...,5.8,0.37,0.53,2.0,3.36,0.0,5.33,1,15,2
10,2,-4.11,-2.58,-1.33,-0.66,-5.69,-5.03,-3.93,-4.45,-8.8,...,-6.84,-0.18,1.15,-3.65,-4.16,0.0,1.98,2,36,2


In [283]:
X = df['латеральные.отделы.префронтальной.коры.справа'].to_numpy()
y = df['латеральные.отделы.префронтальной.коры.слева'].to_numpy()

sorter = np.argsort(X)
X = X[sorter]
y = y[sorter]

lm = LM()
lm.fit(X, y)

clf = LinearRegression()
clf.fit(X.reshape(-1, 1), y.reshape(-1, 1))

fig = go.Figure()

fig.add_scatter(x=X, y=y, mode='markers', name='Данные')
fig.add_scatter(x=X, y=lm.predict(X), name='Линия регрессии')

fig.update_xaxes(title='латеральные.отделы.префронтальной.коры.справа')
fig.update_yaxes(title='латеральные.отделы.префронтальной.коры.слева')

conf_int_upper, conf_int_lower = lm.get_confidance_interval(X)

fig.add_scatter(
    x=X,
    y=conf_int_upper,
    marker_color='red',
    line_dash='dot',
    legendgroup="group",
    name='Доверительный интервал',
)

fig.add_scatter(
    x=X,
    y=conf_int_lower,
    marker_color='red',
    line_dash='dot',
    legendgroup="group",
    showlegend=False,
    name='Доверительный интервал',
)

fig.show()

print(f'alpha = {lm.alpha}, beta = {lm.beta}')

print(f'r2 = {calculate_r2(y, lm.predict(X))}')
s = np.corrcoef([y, lm.predict(X)])[0, 1]
print(f's = {s}, s2 = {s ** 2}')

print(lm.check_correlation())

fig.update_layout(margin=dict(l=5, r=5, t=5, b=5),)
fig.write_image("example_2.svg")

alpha = -0.6489691275300031, beta = 0.8916663248295775
r2 = 0.6804135211158129
s = 0.8248718210217953, s2 = 0.6804135211158128
(10.317566373413996, 5.5560448130235174e-14)


Unsupported