In [369]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random, math, time
import pickle
from sklearn import model_selection, datasets, linear_model, metrics, svm
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
import warnings
warnings.filterwarnings('ignore')

X = np.arange(6).reshape(2, 3)
poly = PolynomialFeatures(2)
print(X, '\n\n',poly.fit_transform(X) )

[[0 1 2]
 [3 4 5]] 

 [[ 1.  0.  1.  2.  0.  0.  0.  1.  2.  4.]
 [ 1.  3.  4.  5.  9. 12. 15. 16. 20. 25.]]


### Сверху видно, как работает PolynomialFeatures
### Загрузим файл и разобъем на train/test:

In [370]:
data = pd.read_csv('data/' + 'calibration_dots.csv')

full_train_data = data.iloc[0:36, :]
full_test_data = data.iloc[36:, :]

print('Shapes of train:{}   test:{}   and all:{}'.format(full_train_data.shape, full_test_data.shape, data.shape))

Shapes of train:(36, 9)   test:(3, 9)   and all:(39, 9)


## Точки калибровки
### Представлены в формате (координаты с 1cam), (координаты с 2cam), (реальные координаты)

In [315]:
data.head(3)

Unnamed: 0,x,y,r,x2,y2,r2,x_real,y_real,z_real
0,212,344,15,347,326,7,-15,-15,0
1,209,237,14,357,339,8,0,-15,0
2,214,130,14,361,359,9,15,-15,0


### Первая модель находит координату z_real по y2

In [373]:
train_labels = full_train_data[['z_real']]
test_labels = full_test_data[['z_real']]
train_data = full_train_data[['y', 'y2']]
test_data = full_test_data[['y', 'y2']]
# test_labels, test_data = train_labels, train_data
# print(train_labels)

poly = PolynomialFeatures(2)
scaler = StandardScaler()
regressor = MultiOutputRegressor(linear_model.LinearRegression())

pipeline = Pipeline(steps = [('polynomial', poly), ('scaling', scaler), ('regression', regressor)])
pipeline.fit(train_data, train_labels)

pickle.dump(pipeline, open('data/calibration_model_1.sav', 'wb')) # save model

pred = pipeline.predict(test_data)
mae = metrics.mean_absolute_error(test_labels, pred, multioutput='raw_values')
print('MAE = ',  *mae)
# mse = metrics.mean_squared_error(test_labels, pred, multioutput='raw_values')
# print('standart error = ',  math.sqrt(*mse))
print('Coefficients: ', regressor.estimators_[0].coef_)

MAE =  0.1704939376019965
Coefficients:  [ 0.          5.94432221 -6.801733    0.10311616 -7.25001225 -1.42582614]


#### Применение на реальных данных:

In [374]:
arr = [[375,184,14,241,353,10,7,7,0], [259,308,22,308,242,8,-7,-7,20], [332,243,27,274,191,8,0,0,30]] # Shuld be 0, 20, 30
for ab in arr:
    kek = [ab[1], ab[4]]
    aza = np.array(kek, dtype=np.float64).reshape(1, -1)
    pred = pipeline.predict(aza)
    print(list(pred))

[array([-0.19907564])]
[array([19.78946828])]
[array([30.10187445])]


### Вторая модель находит координаты x_real, y_real по x, y, z_real
#### Обе модели аппроксимируются Полиномом 2й степени

In [376]:
# full_train_data['z_real2'] = full_train_data['z_real']**2
# full_test_data['z_real2'] = full_test_data['z_real']**2
# full_train_data['z_real2'] = np.log10(full_train_data['z_real']+1)
# full_test_data['z_real2'] = np.log10(full_test_data['z_real']+1)
# full_train_data.head(1)

train_labels = full_train_data[['x_real', 'y_real']]
train_data = full_train_data[['x', 'y', 'z_real']]
test_labels = full_test_data[['x_real', 'y_real']]
test_data = full_test_data[['x', 'y', 'z_real']]

# train_data = np.hstack((train_data, train_data**2))
# test_data = np.hstack((test_data, test_data**2))
# test_labels, test_data = train_labels, train_data

poly = PolynomialFeatures(2)
scaler = StandardScaler()
regressor = MultiOutputRegressor(linear_model.LinearRegression())

pipeline = Pipeline(steps = [('polynomial', poly), ('scaling', scaler), ('regression', regressor)])
pipeline.fit(train_data, train_labels)

pickle.dump(pipeline, open('data/calibration_model_2.sav', 'wb')) # save model

pred = pipeline.predict(train_data)
mae = metrics.mean_absolute_error(train_labels, pred, multioutput='raw_values')
print('MAE = ',  *mae)

pred = pipeline.predict(test_data)
mae = metrics.mean_absolute_error(test_labels, pred, multioutput='raw_values')
mse = metrics.mean_squared_error(test_labels, pred, multioutput='raw_values')
print('MAE = ',  *mae)
# print('standart error = ',  *np.sqrt(mse))

print('\nCoefficients of x_real part: ', regressor.estimators_[0].coef_, '\n')
print('Coefficients of y_real part: ', regressor.estimators_[1].coef_, '\n')

MAE =  0.5257324679385922 0.3072334964868086
MAE =  1.161157094870487 0.8076862680221281

Coefficients of x_real part:  [  0.           0.82877735 -16.88524543  -5.10791451  -0.71564207
   0.23529801   0.25939657  -0.45671391   8.52477122  -0.15754303] 

Coefficients of y_real part:  [ 0.         17.0653837  -0.58839762  6.8448768  -1.22933379  0.23042171
 -7.72881782  0.70502384 -0.33415667 -0.34562874] 



In [377]:
pred = pipeline.predict(train_data)

# for index, row in train_labels.iterrows():
#     print('actual ', row['x_real'], row['y_real'])
#     print('predicted ', round(pred[index][0],1), round(pred[index][1],1))
#     print('error ', round(pred[index][0]-row['x_real'],1), round(pred[index][1]-row['y_real'], 1), '\n')

arr = [[375,184,14,241,353,10,7,7,0], [259,308,22,308,242,8,-7,-7,20], [332,243,27,274,191,8,0,0,30]]
for ab in arr:
    kek = [*ab[0:2], ab[8]]
    aza = np.array(kek, dtype=np.float64).reshape(1, -1)
    pred = pipeline.predict(aza)
    print('x_real, y_real', list(pred))

x_real, y_real [array([7.77096412, 7.26864499])]
x_real, y_real [array([-5.79770879, -5.87962914])]
x_real, y_real [array([1.51021596, 1.03404295])]


In [366]:
arr = np.array([11.2, 19.8])
np.round(arr).astype(int)

array([11, 20])