In [1]:
import numpy as np
import pandas as pd


full_test = pd.read_csv("test.csv", index_col='id')
full_train = pd.read_csv("train.csv", index_col='id')

# переход в плоскость Oxy
## x_transformed, y_transformed - координаты в плоскости
## x_pred, y_pred, (z_pred = 0) - предсказание в 2д
## x_pred_transformed, y_pred_transformed, z_pred_transformed - предсказание в 3д

In [2]:
# coefs for plane
test_sz = full_train.sat_id.nunique()
train_coefs = pd.DataFrame({'sat_id': full_train.sat_id.unique(), 'A': np.zeros(test_sz),
                            'B': np.zeros(test_sz), 'C': np.zeros(test_sz), 'D': np.zeros(test_sz)})

from random import shuffle
np.random.seed(31415)

for sat_id in train_coefs.sat_id:
    # print(sat_id)
    df = full_train[full_train.sat_id == sat_id]
    n = 0; i = 0
    
    a = np.zeros(df.shape[0] // 3); b = np.zeros(df.shape[0] // 3)
    c = np.zeros(df.shape[0] // 3); d = np.zeros(df.shape[0] // 3)

    p1 = df[['x', 'y', 'z']].sample(frac=1).values
    p2 = df[['x', 'y', 'z']].sample(frac=1).values
    p3 = df[['x', 'y', 'z']].sample(frac=1).values
       
    v1 = p3 - p1
    v2 = p2 - p1
 
    # the cross product is a vector normal to the plane
    cp = np.cross(v1, v2)
    sgn_ = 2 * (cp[:, 0] >= 0) - 1
    cp *= sgn_.reshape(-1, 1)
    a, b, c = cp[:, 0], cp[:, 1], cp[:, 2]
   
    norm = np.sqrt(a ** 2 + b ** 2 + c ** 2)
 
    a /= norm
    b /= norm
    c /= norm
    
    d = -(a * p1[:, 0] + b * p1[:, 1] + c * p1[:, 2])
 
    train_coefs.loc[train_coefs.sat_id == sat_id, 'A'] = np.nanmedian(a)
    train_coefs.loc[train_coefs.sat_id == sat_id, 'B'] = np.nanmedian(b)
    train_coefs.loc[train_coefs.sat_id == sat_id, 'C'] = np.nanmedian(c)
    train_coefs.loc[train_coefs.sat_id == sat_id, 'D'] = np.nanmedian(d)



In [37]:
def fit_transform(df, train_coefs):
    df['x_transformed'] = df['x']; df['y_transformed'] = df['y']; df['z_transformed'] = df['z']
    # TRANSFORM in 3d (new_coord)
    for sat_id in df.sat_id.unique():
        A, B, C, D = train_coefs[train_coefs.sat_id == sat_id][['A', 'B', 'C', 'D']].values[0]
        d = np.sqrt(B ** 2 + C ** 2)
#         cos_alpha = C / d; sin_alpha = B / d
#         cos_beta = d / np.sqrt(A ** 2 + d ** 2); sin_beta = A / np.sqrt(A ** 2 + d ** 2)
        
        # My(-beta) * Mx(alpha)
        M = [[d/np.sqrt(A ** 2 + d ** 2), -B*A/d/ np.sqrt(A ** 2 + d ** 2), -C*A/d/ np.sqrt(A ** 2 + d ** 2)],
             [0, C/d, -B/d],
             [A / np.sqrt(A ** 2 + d ** 2), B / np.sqrt(A ** 2 + d ** 2), C / np.sqrt(A ** 2 + d ** 2)]]
        
        sz = df[df.sat_id == sat_id].shape[0]
        
        coord = np.dot(M, (df[df.sat_id == sat_id][['x_transformed', 'y_transformed', 'z_transformed']].values.T \
                    + np.array([np.zeros(sz), np.zeros(sz), -D/C * np.ones(sz)]))).T
        df.loc[df.sat_id == sat_id, 'x_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_transformed'] = coord[:, 1]
        df.loc[df.sat_id == sat_id, 'z_transformed'] = coord[:, 2]    
    
    # TRANSFORM in 2d (shift_to_center_and_rotate)
    train_coefs['xc'] = 0.0; train_coefs['yc'] = 0.0
    #shift
    for sat_id in df.sat_id.unique():
        train_coefs.loc[train_coefs.sat_id == sat_id, 'xc'] = (df[df.sat_id == sat_id].x_transformed.max() + df[df.sat_id == sat_id].x_transformed.min()) / 2
        train_coefs.loc[train_coefs.sat_id == sat_id, 'yc'] = (df[df.sat_id == sat_id].y_transformed.max() + df[df.sat_id == sat_id].y_transformed.min()) / 2
    
        df.loc[df.sat_id == sat_id, 'x_transformed'] = df[df.sat_id == sat_id].x_transformed.values - train_coefs[train_coefs.sat_id == sat_id].xc.values
        df.loc[df.sat_id == sat_id, 'y_transformed'] = df[df.sat_id == sat_id].y_transformed.values - train_coefs[train_coefs.sat_id == sat_id].yc.values
    
    #rotate
    train_coefs['small_polyos'] = 0.0; train_coefs['big_polyos'] = 0.0   
    for sat_id in df.sat_id.unique():
        distances = np.sqrt((df[df.sat_id == sat_id].x_transformed.values) ** 2 + (df[df.sat_id == sat_id].y_transformed.values) ** 2)
        id_min = np.argmin(distances); id_max = np.argmax(distances)
        train_coefs.loc[train_coefs.sat_id == sat_id, 'small_polyos'] = distances[id_min]
        train_coefs.loc[train_coefs.sat_id == sat_id, 'big_polyos'] = distances[id_max]
 
        a = df.loc[df[df.sat_id == sat_id].index[0] + id_max][['x_transformed', 'y_transformed']].values #vector where big polyos
        cos_teta =  a[0] / np.sqrt(a[0] ** 2 + a[1] ** 2)
        sin_teta = a[1] / np.sqrt(a[0] ** 2 + a[1] ** 2)
        train_coefs.loc[train_coefs.sat_id == sat_id, 'cos_teta'] = cos_teta
        train_coefs.loc[train_coefs.sat_id == sat_id, 'sin_teta'] = sin_teta

        M = [[cos_teta, sin_teta],
            [-sin_teta, cos_teta]]
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_transformed', 'y_transformed']].values.T)).T
        df.loc[df.sat_id == sat_id, 'x_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_transformed'] = coord[:, 1]

    return df, train_coefs    

In [38]:
full_train_new, train_coefs_new  = fit_transform(full_train.copy(), train_coefs.copy())
full_train_new.head()

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim,x_transformed,y_transformed,z_transformed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,2014-01-01T00:00:00.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.22169,-20741.615306,-0.907527,-3.80493,-2.024133,35794.106665,-1.818989e-12,-23.30178
1,2014-01-01T00:46:43.000,0,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796,-10555.500066,1649.289367,-24473.089556,-0.303704,-4.269816,-0.616468,33572.847092,-12002.66,15.548148
2,2014-01-01T01:33:26.001,0,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,-24271.169776,0.27488,-4.046788,0.718768,27489.568837,-22116.18,51.475995
3,2014-01-01T02:20:09.001,0,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115,-9149.620794,-20618.200201,-20765.019094,0.712437,-3.375202,1.718306,18953.838217,-29271.83,80.140214
4,2014-01-01T03:06:52.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,-14992.399986,0.989382,-2.522618,2.342237,9377.516145,-33289.17,99.956861


In [46]:
def inv_transform(df, train_coefs):
    # INVERSE TRANSFORM in 2d (inv_shift_to_center_and_rotate)
    #rotate
    df['x_pred_transformed'] = df['x_pred']; df['y_pred_transformed'] = df['y_pred']; df['z_pred_transformed'] = df['z_pred']
    for sat_id in df.sat_id.unique():     
        cos_teta =  train_coefs.loc[train_coefs.sat_id == sat_id, 'cos_teta'].values[0]
        sin_teta = train_coefs.loc[train_coefs.sat_id == sat_id, 'sin_teta'].values[0]
        
        M = [[cos_teta, -sin_teta],
            [sin_teta, cos_teta]]
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_pred_transformed', 'y_pred_transformed']].values.T)).T
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = coord[:, 1]

    #shift
    for sat_id in df.sat_id.unique():
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = df[df.sat_id == sat_id].x_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].xc.values
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = df[df.sat_id == sat_id].y_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].yc.values
     
    # INVERSE TRANSFORM in 3d (inv_new_coord)
    for sat_id in df.sat_id.unique():
        A, B, C, D = train_coefs[train_coefs.sat_id == sat_id][['A', 'B', 'C', 'D']].values[0]
        d = np.sqrt(B ** 2 + C ** 2)
        
        # Mx(-alpha) * My(beta)
        M = [[d/np.sqrt(A ** 2 + d ** 2), 0, A / np.sqrt(A ** 2 + d ** 2)],
             [-B*A/d/ np.sqrt(A ** 2 + d ** 2), C/d, B / np.sqrt(A ** 2 + d ** 2)],
             [-C*A/d/ np.sqrt(A ** 2 + d ** 2), -B/d, C / np.sqrt(A ** 2 + d ** 2)]]        

        sz = df[df.sat_id == sat_id].shape[0]
        
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_pred_transformed', 'y_pred_transformed', 'z_pred_transformed']].values.T) \
                + np.array([np.zeros(sz), np.zeros(sz), D/C * np.ones(sz)])).T
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = coord[:, 1]
        df.loc[df.sat_id == sat_id, 'z_pred_transformed'] = coord[:, 2]

    return df

In [47]:
full_train_inv = full_train_new.copy()
full_train_inv['x_pred'] = full_train_inv['x_transformed']
full_train_inv['y_pred'] = full_train_inv['y_transformed']
full_train_inv['z_pred'] = full_train_inv['z_transformed']

inv_transform(full_train_inv, train_coefs_new)
full_train_inv.head()

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,...,Vz_sim,x_transformed,y_transformed,z_transformed,x_pred,y_pred,z_pred,x_pred_transformed,y_pred_transformed,z_pred_transformed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2014-01-01T00:00:00.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.22169,...,-2.024133,35794.106665,-1.818989e-12,-23.30178,35794.106665,-1.818989e-12,-23.30178,-8855.823863,13117.780146,-20728.353233
1,2014-01-01T00:46:43.000,0,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796,-10555.500066,1649.289367,...,-0.616468,33572.847092,-12002.66,15.548148,33572.847092,-12002.66,15.548148,-10567.672384,1619.746066,-24451.813271
2,2014-01-01T01:33:26.001,0,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,...,0.718768,27489.568837,-22116.18,51.475995,27489.568837,-22116.18,51.475995,-10578.684043,-10180.46746,-24238.280949
3,2014-01-01T02:20:09.001,0,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115,-9149.620794,-20618.200201,...,1.718306,18953.838217,-29271.83,80.140214,18953.838217,-29271.83,80.140214,-9148.251857,-20651.43746,-20720.381279
4,2014-01-01T03:06:52.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,...,2.342237,9377.516145,-33289.17,99.956861,9377.516145,-33289.17,99.956861,-6719.092336,-28929.061629,-14938.907967


In [48]:
# check
print(np.sum(np.abs(full_train_inv['x_pred_transformed'].values - full_train_new['x'].values) \
       + np.abs(full_train_inv['y_pred_transformed'].values - full_train_new['y'].values) \
       + np.abs(full_train_inv['z_pred_transformed'].values - full_train_new['z'].values)))

5.718807405386515e-06


# if take z = 0:

In [49]:
def inv_transform1(df, train_coefs):
    # INVERSE TRANSFORM in 2d (inv_shift_to_center_and_rotate)
    #rotate
    df['x_pred_transformed'] = df['x_pred']; df['y_pred_transformed'] = df['y_pred']; df['z_pred_transformed'] = 0.0
    for sat_id in df.sat_id.unique():     
        cos_teta =  train_coefs.loc[train_coefs.sat_id == sat_id, 'cos_teta'].values[0]
        sin_teta = train_coefs.loc[train_coefs.sat_id == sat_id, 'sin_teta'].values[0]
        
        M = [[cos_teta, -sin_teta],
            [sin_teta, cos_teta]]
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_pred_transformed', 'y_pred_transformed']].values.T)).T
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = coord[:, 1]

    #shift
    for sat_id in df.sat_id.unique():
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = df[df.sat_id == sat_id].x_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].xc.values
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = df[df.sat_id == sat_id].y_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].yc.values
     
    # INVERSE TRANSFORM in 3d (inv_new_coord)
    for sat_id in df.sat_id.unique():
        A, B, C, D = train_coefs[train_coefs.sat_id == sat_id][['A', 'B', 'C', 'D']].values[0]
        d = np.sqrt(B ** 2 + C ** 2)
        
        # Mx(-alpha) * My(beta)
        M = [[d/np.sqrt(A ** 2 + d ** 2), 0, A / np.sqrt(A ** 2 + d ** 2)],
             [-B*A/d/ np.sqrt(A ** 2 + d ** 2), C/d, B / np.sqrt(A ** 2 + d ** 2)],
             [-C*A/d/ np.sqrt(A ** 2 + d ** 2), -B/d, C / np.sqrt(A ** 2 + d ** 2)]]        

        sz = df[df.sat_id == sat_id].shape[0]
        
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_pred_transformed', 'y_pred_transformed', 'z_pred_transformed']].values.T) \
                + np.array([np.zeros(sz), np.zeros(sz), D/C * np.ones(sz)])).T
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = coord[:, 1]
        df.loc[df.sat_id == sat_id, 'z_pred_transformed'] = coord[:, 2]

    return df

In [51]:
full_train_inv = full_train_new.copy()
full_train_inv['x_pred'] = full_train_inv['x_transformed']
full_train_inv['y_pred'] = full_train_inv['y_transformed']
full_train_inv['z_pred'] = full_train_inv['z_transformed']

inv_transform1(full_train_inv, train_coefs_new)
full_train_inv.head()

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,...,Vz_sim,x_transformed,y_transformed,z_transformed,x_pred,y_pred,z_pred,x_pred_transformed,y_pred_transformed,z_pred_transformed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2014-01-01T00:00:00.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.22169,...,-2.024133,35794.106665,-1.818989e-12,-23.30178,35794.106665,-1.818989e-12,-23.30178,-8834.447768,13117.521471,-20737.62516
1,2014-01-01T00:46:43.000,0,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796,-10555.500066,1649.289367,...,-0.616468,33572.847092,-12002.66,15.548148,33572.847092,-12002.66,15.548148,-10581.935615,1619.918668,-24445.626564
2,2014-01-01T01:33:26.001,0,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,...,0.718768,27489.568837,-22116.18,51.475995,27489.568837,-22116.18,51.475995,-10625.906003,-10179.89602,-24217.798323
3,2014-01-01T02:20:09.001,0,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115,-9149.620794,-20618.200201,...,1.718306,18953.838217,-29271.83,80.140214,18953.838217,-29271.83,80.140214,-9221.769193,-20650.547816,-20688.492976
4,2014-01-01T03:06:52.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,...,2.342237,9377.516145,-33289.17,99.956861,9377.516145,-33289.17,99.956861,-6810.788648,-28927.951998,-14899.134494


In [52]:
# check
print(np.sum(np.abs(full_train_inv['x_pred_transformed'].values - full_train_new['x'].values) \
       + np.abs(full_train_inv['y_pred_transformed'].values - full_train_new['y'].values) \
       + np.abs(full_train_inv['z_pred_transformed'].values - full_train_new['z'].values)))

285643341.98941416
