**Importing libraries**

In [13]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

**Dataframes**

In [14]:
df1 = pd.DataFrame(np.array([[69,74,68,70,72,67,66,70,76,68,72,79,74,67,66,71,74,75,75,76],[153,175,155,135,172,150,115,137,200,130,140,265,185,112,140,150,165,185,210,220]]).T, columns = ['Weight','Height'])

df2 = pd.DataFrame(np.array([[356, 289, 319, 356, 323, 381, 350, 301, 379, 296, 353, 306, 290, 371, 312, 393, 364, 359, 296, 345, 378, 304, 347, 327, 386, 365, 365, 352, 325, 321, 360, 336, 352, 353, 373, 376, 367, 335, 396, 277, 378, 360, 291, 269, 318, 328],
               [124, 117, 143, 199, 240, 157, 221, 186, 142, 131, 221, 178, 136, 200, 208, 202, 152, 185, 116, 123, 136, 134, 184, 192, 279, 228, 145, 172, 179, 222, 134, 143, 169, 263, 174, 134, 182, 241, 128, 222, 165, 282, 94, 121, 73, 106],
               [55, 76, 105, 108, 143, 165, 119, 105, 98, 94, 53, 66, 142, 93, 68, 102, 76, 37, 60, 50, 47, 50, 91, 124, 74, 235, 158, 140, 145, 99, 90, 105, 32, 165, 78, 80, 54, 175, 80, 186, 117, 160, 71, 29, 42, 56],
               [0.81, 0.95, 0.94, 1.04, 1.0, 0.76, 0.91, 1.1, 0.99, 0.78, 0.9, 0.73, 0.96, 0.84, 0.74, 0.98, 1.1, 0.85, 0.83, 0.93, 0.95, 0.74, 0.95, 0.97, 0.72, 1.11, 1.2, 1.13, 1.0, 0.78, 1.0, 1.0, 0.71, 0.76, 0.89, 0.88, 1.17, 0.85, 0.97, 1.0, 1.0, 0.89, 0.98, 0.78, 0.74, 0.91]]).T, columns = ['x1','x2','x3','y1'])

df3 = pd.DataFrame(np.array([[97, 103, 66, 80, 116, 109, 77, 115, 76, 72, 130, 150, 150, 99, 119, 164, 160, 144, 77, 114, 77, 118, 170, 153, 143, 114, 73, 116, 63, 105, 83, 81, 120, 107, 99, 113, 136, 109, 72, 130, 130, 83, 150, 119, 122, 102, 104, 119, 92],
               [69, 78, 99, 85, 130, 101, 102, 110, 85, 133, 134, 158, 131, 98, 85, 98, 117, 71, 82, 93, 70, 115, 147, 132, 105, 113, 106, 81, 87, 132, 94, 87, 89, 109, 111, 124, 112, 88, 90, 101, 117, 92, 142, 120, 155, 90, 69, 94, 94],
               [98, 107, 130, 114, 91, 103, 130, 109, 119, 127, 121, 100, 142, 105, 109, 138, 121, 153, 89, 122, 109, 150, 121, 115, 100, 129, 116, 77, 70, 80, 133, 86, 59, 101, 98, 97, 122, 105, 71, 90, 144, 107, 146, 119, 149, 122, 96, 89, 100],
               [60, 56, 80, 55, 62, 74, 64, 73, 68, 69, 60, 70, 66, 83, 68, 78, 103, 77, 66, 70, 75, 91, 66, 75, 74, 76, 74, 74, 67, 78, 64, 71, 63, 90, 60, 48, 66, 74, 60, 63, 66, 77, 70, 73, 78, 73, 72, 65, 52],
               [69, 53, 69, 80, 75, 64, 71, 70, 67, 82, 67, 74, 74, 70, 66, 63, 77, 68, 77, 70, 65, 74, 75, 82, 71, 70, 90, 77, 71, 75, 66, 80, 75, 103, 76, 77, 93, 70, 74, 75, 80, 67, 67, 76, 90, 68, 83, 60, 70],
               [62, 84, 76, 90, 68, 70, 66, 64, 75, 74, 61, 78, 78, 74, 90, 75, 77, 74, 68, 72, 71, 93, 73, 76, 66, 64, 86, 80, 69, 80, 71, 76, 73, 74, 61, 75, 97, 76, 71, 66, 86, 74, 100, 81, 77, 80, 68, 70, 76]]).T, columns = ['x1','x2','x3','y1','y2','y3'])

**Building models**

**First table**

In [15]:
model = sm.OLS(df1[['Weight']],df1[['Height']])

res = model.fit()
res.summary()

0,1,2,3
Dep. Variable:,Weight,R-squared (uncentered):,0.969
Model:,OLS,Adj. R-squared (uncentered):,0.968
Method:,Least Squares,F-statistic:,598.5
Date:,"Tue, 08 Jun 2021",Prob (F-statistic):,7.94e-16
Time:,19:25:32,Log-Likelihood:,-78.973
No. Observations:,20,AIC:,159.9
Df Residuals:,19,BIC:,160.9
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Height,0.4173,0.017,24.465,0.000,0.382,0.453

0,1,2,3
Omnibus:,6.674,Durbin-Watson:,1.632
Prob(Omnibus):,0.036,Jarque-Bera (JB):,4.199
Skew:,-1.019,Prob(JB):,0.122
Kurtosis:,3.94,Cond. No.,1.0


**Результати**

Гіпотеза про те, що коефіцієнти при x рівні 0 **відхиляється**, оскільки значення p менше за 0,05.

Модель дає майже ідеальні результати прогнозування (коефіцієнти детермінації та скоригований коефіцієнт детермінації дуже близькі до 1)

Стандартна похибка становить 0,017.

**Second table**

In [16]:
model2 = sm.OLS(df2[['y1']],df2[['x1', 'x2', 'x3']])

res2 = model2.fit()
res2.summary()

0,1,2,3
Dep. Variable:,y1,R-squared (uncentered):,0.982
Model:,OLS,Adj. R-squared (uncentered):,0.981
Method:,Least Squares,F-statistic:,790.1
Date:,"Tue, 08 Jun 2021",Prob (F-statistic):,1.31e-37
Time:,19:25:48,Log-Likelihood:,30.878
No. Observations:,46,AIC:,-55.76
Df Residuals:,43,BIC:,-50.27
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
x1,0.0027,0.000,12.723,0.000,0.002,0.003
x2,-0.0009,0.000,-1.930,0.060,-0.002,4.02e-05
x3,0.0016,0.000,3.431,0.001,0.001,0.003

0,1,2,3
Omnibus:,3.292,Durbin-Watson:,2.269
Prob(Omnibus):,0.193,Jarque-Bera (JB):,2.412
Skew:,-0.272,Prob(JB):,0.299
Kurtosis:,3.981,Cond. No.,12.3


**Результати**

Для x2 приймаємо гіпотезу, для інших відхиляємо.

Модель дає ще кращі результати ніж у попередньому пункті, майже ідеальне прогнозування.

Стандарнті похибки близькі до нуля.

**Third table**

In [17]:
model3 = LinearRegression()

model3.fit(df3[['y1', 'y2', 'y3']], df3[['x1', 'x2', 'x3']])
r2 = r2_score(df3[['y1', 'y2', 'y3']], model3.predict(df3[['x1', 'x2', 'x3']]))
print('Коефіцієнт детермінації: ', r2)

Коефіцієнт детермінації:  -71.54475932132334


**Результати**

Оскільки коефіцієнт детермінації вийшов від'ємним, то можна зробити висновок, що дана модель непридатна для цих даних.