In [20]:
import pandas as pd
import numpy as np

## Loading data

In [21]:
df = pd.read_csv('child_height.csv', sep=";")

print(df.to_string())

    mother  father  child
0      172     184     79
1      168     190     80
2      170     171     75
3      170     185     78
4      173     185     81
5      162     172     73
6      159     170     72
7      164     178     75
8      169     175     76
9      167     177     75
10     177     179     79
11     172     174     76
12     159     172     72
13     160     173     72
14     168     189     79
15     166     187     77
16     172     174     77
17     164     173     75
18     171     179     76
19     161     175     75
20     172     186     80
21     166     182     76
22     156     191     76
23     172     180     78
24     170     183     78
25     157     182     75
26     159     177     73
27     163     176     74
28     164     169     73
29     171     186     79


## Linear regression

### Ready solution

In [22]:
from sklearn.linear_model import LinearRegression

In [23]:
X_data = df[["father", "mother"]]
y_data = df["child"]

In [24]:
print(X_data.shape)

(30, 2)


In [25]:
reg = LinearRegression().fit(X_data, y_data)

In [26]:
reg.score(X_data, y_data)

0.9305852352706083

In [27]:
reg.coef_

array([0.25145831, 0.28234502])

In [28]:
reg.intercept_

-15.91226563393134

### Self-made solution

In [29]:
from polynomial_reg import PolynomialRegression

#### Auxiliary data

Data is created to be fit by the reggresion model of the form:
$$y=1+2*x_1+0*x_2\,.$$

In [30]:
input = np.array([[1,1], [3,1], [11,4], [0,0], [2,4], [20,4] ])
output = np.array([3, 7, 23, 1, 5, 41 ])

In [31]:
regr = PolynomialRegression(input, output)
regr.train(input, output)
regr.coefficients

array([1.00000000e+00, 2.00000000e+00, 1.99840144e-15])

#### Self-made model on height data

In [32]:
X_data = X_data.to_numpy()
y_data = y_data.to_numpy()

In [33]:
self_made_reg = PolynomialRegression(X_data, y_data)
self_made_reg.train(X_data, y_data)
self_made_reg.coefficients

array([-15.91226563,   0.25145831,   0.28234502])

In [34]:
self_made_reg.r_squared

0.9305852352706078

Coefficients of the self-made model are the same as the one from scikit package, so everything works correctly.

## Polynomial regression of higher degree

In [35]:
second_degree_reg = PolynomialRegression(X_data, y_data)
self_made_reg.setPolDegree(2)
self_made_reg.train(X_data, y_data)
self_made_reg.coefficients

array([ 1.91212664e+02, -8.75615185e-01, -1.00058948e+00,  3.12262486e-03,
        3.89370801e-03])

In [36]:
self_made_reg.r_squared

0.934011675621128