In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random, math, time
import pickle
from sklearn import model_selection, datasets, linear_model, metrics, svm
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')

X = np.arange(6).reshape(2, 3)
poly = PolynomialFeatures(2)
print(X, '\n\n',poly.fit_transform(X) )

[[0 1 2]
 [3 4 5]] 

 [[ 1.  0.  1.  2.  0.  0.  0.  1.  2.  4.]
 [ 1.  3.  4.  5.  9. 12. 15. 16. 20. 25.]]


### Сверху видно, как работает PolynomialFeatures
### Загрузим файл и разобъем на train/test:

In [31]:
data = pd.read_csv('data/' + 'calibration_dots3.csv')

full_train_data = data.iloc[0:36, :]
full_test_data = data.iloc[36:, :]

print('Shapes of train:{}   test:{}   and all:{}'.format(full_train_data.shape, full_test_data.shape, data.shape))

Shapes of train:(36, 9)   test:(3, 9)   and all:(39, 9)


## Точки калибровки
### Представлены в формате (координаты с 1cam), (координаты с 2cam), (реальные координаты)

In [32]:
data.head(3)

Unnamed: 0,x,y,r,x2,y2,r2,x_real,y_real,z_real
0,234,352,10,382,311,10,-15,-15,0
1,325,355,10,394,328,12,0,-15,0
2,422,357,10,404,355,15,15,-15,0


### Первая модель находит координату z_real по y1, x2

In [104]:
train_labels = full_train_data[['z_real']]
test_labels = full_test_data[['z_real']]
train_data = full_train_data[['y', 'x2']]
test_data = full_test_data[['y', 'x2']]
# test_labels, test_data = train_labels, train_data
# print(train_labels)

poly1 = PolynomialFeatures(2)
scaler1 = StandardScaler()
regressor1 = MultiOutputRegressor(linear_model.LinearRegression())

pipeline1 = Pipeline(steps = [('polynomial', poly1), ('scaling', scaler1), ('regression', regressor1)])
pipeline1.fit(train_data, train_labels)

# pickle.dump(pipeline1, open('data/calibration_model_1_test.sav', 'wb')) # save model

pred1 = pipeline1.predict(test_data)
mae = metrics.mean_absolute_error(test_labels, pred1, multioutput='raw_values')
print('MAE = ',  *mae)
mse = metrics.mean_squared_error(test_labels, pred1, multioutput='raw_values')
print('MSE/standart error = ',  *mse, math.sqrt(*mse))
print('Coefficients: ', regressor1.estimators_[0].coef_)

MAE =  0.31933886506035886
MSE/standart error =  0.11636497404498786 0.34112310687637076
Coefficients:  [ 0.         -1.59965462  3.9308887  -2.40994563 -9.3820763   2.0147859 ]


#### Применение на реальных данных:

In [105]:
arr = [[394,385,15,244,340,13,7,7,0], [283,221,13,341,199,11,-7,-7,20], [333,136,14,298,130,12,0,0,30]] # Shuld be 0, 20, 30
for ab in arr:
    kek = [ab[1], ab[3]]
    aza = np.array(kek, dtype=np.float64).reshape(1, -1)
    pred = pipeline1.predict(aza)
    print(list(pred))

[array([0.34486829])]
[array([20.45180735])]
[array([29.83865904])]


### Вторая модель как Первая, но на основании других полей. Находит координату z_real по x1, y2

In [106]:
train_labels = full_train_data[['z_real']]
test_labels = full_test_data[['z_real']]
train_data = full_train_data[['x', 'y2']]
test_data = full_test_data[['x', 'y2']]
# test_labels, test_data = train_labels, train_data
# print(train_labels)

poly2 = PolynomialFeatures(2)
scaler2 = StandardScaler()
regressor2 = MultiOutputRegressor(linear_model.LinearRegression())

pipeline2 = Pipeline(steps = [('polynomial', poly2), ('scaling', scaler2), ('regression', regressor2)])
pipeline2.fit(train_data, train_labels)

# pickle.dump(pipeline2, open('data/calibration_model_1_2_test.sav', 'wb')) # save model

pred2 = pipeline2.predict(test_data)
mae = metrics.mean_absolute_error(test_labels, pred2, multioutput='raw_values')
print('MAE = ',  *mae)
mse = metrics.mean_squared_error(test_labels, pred2, multioutput='raw_values')
print('MSE/standart error = ',  *mse, math.sqrt(*mse))
print('Coefficients: ', regressor2.estimators_[0].coef_)

MAE =  0.4303866491587443
MSE/standart error =  0.20424964309726532 0.4519398666827981
Coefficients:  [  0.          -4.49916939 -13.54758999   0.16032307   8.35585825
  -3.74861091]


In [107]:
arr = [[394,385,15,244,340,13,7,7,0], [283,221,13,341,199,11,-7,-7,20], [333,136,14,298,130,12,0,0,30]] # Shuld be 0, 20, 30

pred1 = pipeline1.predict(full_test_data[['y', 'x2']])
pred2 = pipeline2.predict(full_test_data[['x', 'y2']])
avg = []
for i in range(len(pred1)):
    avg.append(float(pred1[i]+pred2[i])/2)
print(avg)
mae = metrics.mean_absolute_error(test_labels, avg, multioutput='raw_values')
print('MAE = ',  *mae)
mse = metrics.mean_squared_error(test_labels, avg, multioutput='raw_values')
print('MSE/standart error = ',  *mse, math.sqrt(*mse))

[0.39906273189801844, 20.519244091195404, 29.79371855176477]
MAE =  0.374862757109551
MSE/standart error =  0.15713917537242492 0.3964078396959688


### Ensemble of first/ second models

### Ещё один вариант: Находит координату z_real по x1, y1, x2, y2

In [108]:
train_labels = full_train_data[['z_real']]
test_labels = full_test_data[['z_real']]
train_data = full_train_data[['x', 'y', 'x2', 'y2']]
test_data = full_test_data[['x', 'y', 'x2', 'y2']]
# test_labels, test_data = train_labels, train_data
# print(train_labels)

poly3 = PolynomialFeatures(2)
scaler3 = StandardScaler()
regressor3 = MultiOutputRegressor(linear_model.LinearRegression())

pipeline3 = Pipeline(steps = [('polynomial', poly3), ('scaling', scaler3), ('regression', regressor3)])
pipeline3.fit(train_data, train_labels)

# pickle.dump(pipeline3, open('data/calibration_model_1_3_test.sav', 'wb')) # save model

pred3 = pipeline3.predict(test_data)
mae = metrics.mean_absolute_error(test_labels, pred3, multioutput='raw_values')
print('MAE = ',  *mae)
mse = metrics.mean_squared_error(test_labels, pred3, multioutput='raw_values')
print('MSE/standart error = ',  *mse, math.sqrt(*mse))
print('Coefficients: ', regressor3.estimators_[0].coef_)

MAE =  0.2095983937560959
MSE/standart error =  0.1038230773574919 0.3222158862587192
Coefficients:  [ 0.00000000e+00 -3.68531111e+00  5.82896116e+00  1.47098272e-01
 -1.62956080e+01 -5.31302425e-03 -5.33751632e+00  4.29000005e-01
  1.16816389e+01 -7.57253273e-01 -3.69243193e+00 -3.47668937e+00
  8.40230613e-01  1.49177567e+00  7.69558854e-01]


### Вторая модель находит координаты x_real, y_real по x, y, z_real
#### Обе модели аппроксимируются Полиномом 2й степени

In [109]:
train_labels = full_train_data[['x_real', 'y_real']]
test_labels = full_test_data[['x_real', 'y_real']]
train_data = full_train_data[['x', 'x2', 'z_real']]
test_data = full_test_data[['x', 'x2', 'z_real']]

poly = PolynomialFeatures(2)
scaler = StandardScaler()
regressor = MultiOutputRegressor(linear_model.LinearRegression())

pipeline = Pipeline(steps = [('polynomial', poly), ('scaling', scaler), ('regression', regressor)])
pipeline.fit(train_data, train_labels)

pickle.dump(pipeline, open('data/calibration_model_2.sav', 'wb')) # save model

pred = pipeline.predict(train_data)
mae = metrics.mean_absolute_error(train_labels, pred, multioutput='raw_values')
print('MAE = ',  *mae)

pred = pipeline.predict(test_data)
mae = metrics.mean_absolute_error(test_labels, pred, multioutput='raw_values')
print('MAE = ',  *mae)

print('\nCoefficients of x_real part: ', regressor.estimators_[0].coef_, '\n')
print('Coefficients of y_real part: ', regressor.estimators_[1].coef_, '\n')

MAE =  0.3834737599889404 0.40796364094655135
MAE =  0.19551530139657705 0.1506113135961822

Coefficients of x_real part:  [ 0.          2.56862393 -7.96972716 -1.07833142  1.26938974 11.18486378
  1.12948224  0.25593143 -0.3219025   0.40289879] 

Coefficients of y_real part:  [  0.          -7.95969178 -22.02553762   1.53436652   0.17625752
   9.4188145   -0.79749844   2.41683537  -0.38577829  -0.36742528] 



In [97]:
arr = [[394,385,15,244,340,13,7,7,0], [283,221,13,341,199,11,-7,-7,20], [333,136,14,298,130,12,0,0,30]]
for ab in arr:
    kek = [ab[0], ab[3], ab[8]]
    aza = np.array(kek, dtype=np.float64).reshape(1, -1)
    pred = pipeline.predict(aza)
    print('x_real, y_real', list(pred))

x_real, y_real [array([6.55705218, 8.16449924])]
x_real, y_real [array([-6.65816914, -5.57450783])]
x_real, y_real [array([0.04359505, 1.12678321])]


In [110]:
train_labels = full_train_data[['x_real', 'y_real']]
test_labels = full_test_data[['x_real', 'y_real']]
train_data = full_train_data[['x', 'y', 'x2', 'y2', 'z_real']]
test_data = full_test_data[['x', 'y', 'x2', 'y2', 'z_real']]

poly = PolynomialFeatures(2)
scaler = StandardScaler()
regressor = MultiOutputRegressor(linear_model.LinearRegression())

pipeline = Pipeline(steps = [('polynomial', poly), ('scaling', scaler), ('regression', regressor)])
pipeline.fit(train_data, train_labels)

pickle.dump(pipeline, open('data/calibration_model_2.sav', 'wb')) # save model

pred = pipeline.predict(train_data)
mae = metrics.mean_absolute_error(train_labels, pred, multioutput='raw_values')
print('MAE = ',  *mae)

pred = pipeline.predict(test_data)
mae = metrics.mean_absolute_error(test_labels, pred, multioutput='raw_values')
print('MAE = ',  *mae)

print('\nCoefficients of x_real part: ', regressor.estimators_[0].coef_, '\n')
print('Coefficients of y_real part: ', regressor.estimators_[1].coef_, '\n')
print('Coefficients of z_real part: ', regressor.estimators_[2].coef_, '\n')

MAE =  0.2784109423916143 0.30472685930851146
MAE =  0.7763920311209672 0.27722347538311887

Coefficients of x_real part:  [  0.           1.54210839  -3.10166474  -4.54171088 136.89911262
 105.28255861   2.47712581  12.7167464    9.80765709 -16.81735524
  10.06899872   0.9624321  -11.31891835 -16.12834434  -3.63855876
   0.24040311   5.26454838   5.15378048 -69.57794383 -46.54697522
 -34.51175688] 

Coefficients of y_real part:  [  0.           0.98268914  69.71518094 -20.99681642 -77.67740624
  -1.40597847  -0.51227326  -1.34905561   9.83170629  -7.45888166
 -10.24628671 -44.66550787   6.98386162  -3.15024471 -28.55493493
   4.01905237  -9.05852608  -5.93417395  60.40232984  36.94805798
   4.25836282] 



IndexError: list index out of range

In [112]:
arr = [[394,385,15,244,340,13,7,7,0], [283,221,13,341,199,11,-7,-7,20], [333,136,14,298,130,12,0,0,30]]
for ab in arr:
    kek = [ab[0], ab[1], ab[3], ab[4], ab[8]]
    aza = np.array(kek, dtype=np.float64).reshape(1, -1)
    pred = pipeline.predict(aza)
    print('x_real, y_real', list(pred))

x_real, y_real [array([7.7231442 , 7.48328612])]
x_real, y_real [array([-7.77136955, -7.01943178])]
x_real, y_real [array([ 0.83466234, -0.32895253])]
