In [None]:
import numpy as np
import pandas as pd


full_test = pd.read_csv("test.csv", index_col='id')
full_train = pd.read_csv("train.csv", index_col='id')

# переход в плоскость Oxy
## x_transformed, y_transformed - координаты в плоскости
## x_pred, y_pred, (z_pred = 0) - предсказание в 2д
## x_pred_transformed, y_pred_transformed, z_pred_transformed - предсказание в 3д

In [3]:
from random import shuffle
np.random.seed(31415)

In [5]:
def coefs_for_plane(df, col1, col2, col3):
    sz = df.sat_id.nunique()
    train_coefs = pd.DataFrame({'sat_id': df.sat_id.unique(), 'A': np.zeros(sz),
                                'B': np.zeros(sz), 'C': np.zeros(sz), 'D': np.zeros(sz)})

    for sat_id in train_coefs.sat_id:
        df_part = df[df.sat_id == sat_id]
        n = 0; i = 0

        a = np.zeros(df_part.shape[0] // 3); b = np.zeros(df_part.shape[0] // 3)
        c = np.zeros(df_part.shape[0] // 3); d = np.zeros(df_part.shape[0] // 3)

        p1 = df_part[[col1, col2, col3]].sample(frac=1).values
        p2 = df_part[[col1, col2, col3]].sample(frac=1).values
        p3 = df_part[[col1, col2, col3]].sample(frac=1).values

        v1 = p3 - p1
        v2 = p2 - p1

        # the cross product is a vector normal to the plane
        cp = np.cross(v1, v2)
        sgn_ = 2 * (cp[:, 0] >= 0) - 1
        cp *= sgn_.reshape(-1, 1)
        a, b, c = cp[:, 0], cp[:, 1], cp[:, 2]

        norm = np.sqrt(a ** 2 + b ** 2 + c ** 2)

        a /= norm
        b /= norm
        c /= norm

        d = -(a * p1[:, 0] + b * p1[:, 1] + c * p1[:, 2])

        train_coefs.loc[train_coefs.sat_id == sat_id, 'A'] = np.nanmedian(a)
        train_coefs.loc[train_coefs.sat_id == sat_id, 'B'] = np.nanmedian(b)
        train_coefs.loc[train_coefs.sat_id == sat_id, 'C'] = np.nanmedian(c)
        train_coefs.loc[train_coefs.sat_id == sat_id, 'D'] = np.nanmedian(d) 
    return train_coefs

In [7]:
train_coefs = coefs_for_plane(full_train, 'x', 'y', 'z')



In [15]:
def fit_transform(df, train_coefs, train=True, train_coefs_V=None, train_V=True):
    df['x_transformed'] = df['x']; df['y_transformed'] = df['y']; df['z_transformed'] = df['z']
    df['Vx_transformed'] = df['Vx']; df['Vy_transformed'] = df['Vy']; df['Vz_transformed'] = df['Vz']
    # TRANSFORM in 3d (new_coord)
    for sat_id in df.sat_id.unique():
        A, B, C, D = train_coefs[train_coefs.sat_id == sat_id][['A', 'B', 'C', 'D']].values[0]
        d = np.sqrt(B ** 2 + C ** 2)
#         cos_alpha = C / d; sin_alpha = B / d
#         cos_beta = d / np.sqrt(A ** 2 + d ** 2); sin_beta = A / np.sqrt(A ** 2 + d ** 2)
        
        # My(-beta) * Mx(alpha)
        M = [[d/np.sqrt(A ** 2 + d ** 2), -B*A/d/ np.sqrt(A ** 2 + d ** 2), -C*A/d/ np.sqrt(A ** 2 + d ** 2)],
             [0, C/d, -B/d],
             [A / np.sqrt(A ** 2 + d ** 2), B / np.sqrt(A ** 2 + d ** 2), C / np.sqrt(A ** 2 + d ** 2)]]
        
        sz = df[df.sat_id == sat_id].shape[0]
        
        coord = np.dot(M, (df[df.sat_id == sat_id][['x_transformed', 'y_transformed', 'z_transformed']].values.T \
                    + np.array([np.zeros(sz), np.zeros(sz), -D/C * np.ones(sz)]))).T
        df.loc[df.sat_id == sat_id, 'x_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_transformed'] = coord[:, 1]
        df.loc[df.sat_id == sat_id, 'z_transformed'] = coord[:, 2]
        
        if train_coefs_V != None:
            A, B, C, D = train_coefs_V[train_coefs_V.sat_id == sat_id][['A', 'B', 'C', 'D']].values[0]
            d = np.sqrt(B ** 2 + C ** 2)

            M = [[d/np.sqrt(A ** 2 + d ** 2), -B*A/d/ np.sqrt(A ** 2 + d ** 2), -C*A/d/ np.sqrt(A ** 2 + d ** 2)],
                 [0, C/d, -B/d],
                 [A / np.sqrt(A ** 2 + d ** 2), B / np.sqrt(A ** 2 + d ** 2), C / np.sqrt(A ** 2 + d ** 2)]]

        velocity = np.dot(M, (df[df.sat_id == sat_id][['Vx_transformed', 'Vy_transformed', 'Vz_transformed']].values.T \
                    + np.array([np.zeros(sz), np.zeros(sz), -D/C * np.ones(sz)]))).T
        df.loc[df.sat_id == sat_id, 'Vx_transformed'] = velocity[:, 0]
        df.loc[df.sat_id == sat_id, 'Vy_transformed'] = velocity[:, 1]
        df.loc[df.sat_id == sat_id, 'Vz_transformed'] = velocity[:, 2]        
    
    # TRANSFORM in 2d (shift_to_center_and_rotate)
    if train:
        train_coefs['xc'] = 0.0; train_coefs['yc'] = 0.0
    
    if (train_coefs_V != None and train_V):
        train_coefs_V['xc'] = 0.0; train_coefs_V['yc'] = 0.0
    #shift
    for sat_id in df.sat_id.unique():
        if train:
            train_coefs.loc[train_coefs.sat_id == sat_id, 'xc'] = (df[df.sat_id == sat_id].x_transformed.max() + df[df.sat_id == sat_id].x_transformed.min()) / 2
            train_coefs.loc[train_coefs.sat_id == sat_id, 'yc'] = (df[df.sat_id == sat_id].y_transformed.max() + df[df.sat_id == sat_id].y_transformed.min()) / 2
    
        df.loc[df.sat_id == sat_id, 'x_transformed'] = df[df.sat_id == sat_id].x_transformed.values - train_coefs[train_coefs.sat_id == sat_id].xc.values
        df.loc[df.sat_id == sat_id, 'y_transformed'] = df[df.sat_id == sat_id].y_transformed.values - train_coefs[train_coefs.sat_id == sat_id].yc.values
    
        if train_coefs_V == None:
            df.loc[df.sat_id == sat_id, 'Vx_transformed'] = df[df.sat_id == sat_id].Vx_transformed.values - train_coefs[train_coefs.sat_id == sat_id].xc.values
            df.loc[df.sat_id == sat_id, 'Vy_transformed'] = df[df.sat_id == sat_id].Vy_transformed.values - train_coefs[train_coefs.sat_id == sat_id].yc.values
        else:
            if train_V:
                train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'xc'] = (df[df.sat_id == sat_id].Vx_transformed.max() + df[df.sat_id == sat_id].Vx_transformed.min()) / 2
                train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'yc'] = (df[df.sat_id == sat_id].Vy_transformed.max() + df[df.sat_id == sat_id].Vy_transformed.min()) / 2  
                
            df.loc[df.sat_id == sat_id, 'Vx_transformed'] = df[df.sat_id == sat_id].Vx_transformed.values - train_coefs_V[train_coefs_V.sat_id == sat_id].xc.values
            df.loc[df.sat_id == sat_id, 'Vy_transformed'] = df[df.sat_id == sat_id].Vy_transformed.values - train_coefs_V[train_coefs_V.sat_id == sat_id].yc.values            
    print("ROTATE")
    #rotate
    if train:
        train_coefs['small_polyos'] = 0.0; train_coefs['big_polyos'] = 0.0 
    
    if (train_coefs_V != None and train_V):
        train_coefs_V['small_polyos'] = 0.0; train_coefs_V['big_polyos'] = 0.0 
        
    for sat_id in df.sat_id.unique():
        if train:
            distances = np.sqrt((df[df.sat_id == sat_id].x_transformed.values) ** 2 + (df[df.sat_id == sat_id].y_transformed.values) ** 2)
            id_min = np.argmin(distances); id_max = np.argmax(distances)
            train_coefs.loc[train_coefs.sat_id == sat_id, 'small_polyos'] = distances[id_min]
            train_coefs.loc[train_coefs.sat_id == sat_id, 'big_polyos'] = distances[id_max]
            
            a = df.loc[df[df.sat_id == sat_id].index[0] + id_max][['x_transformed', 'y_transformed']].values #vector where big polyos
            cos_teta =  a[0] / np.sqrt(a[0] ** 2 + a[1] ** 2)
            sin_teta = a[1] / np.sqrt(a[0] ** 2 + a[1] ** 2)
            train_coefs.loc[train_coefs.sat_id == sat_id, 'cos_teta'] = cos_teta
            train_coefs.loc[train_coefs.sat_id == sat_id, 'sin_teta'] = sin_teta
        else:
            cos_teta = train_coefs[train_coefs.sat_id == sat_id].cos_teta.values[0]
            sin_teta = train_coefs[train_coefs.sat_id == sat_id].sin_teta.values[0]

        M = [[cos_teta, sin_teta],
            [-sin_teta, cos_teta]]
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_transformed', 'y_transformed']].values.T)).T
        df.loc[df.sat_id == sat_id, 'x_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_transformed'] = coord[:, 1]
        
        if train_coefs_V != None:
            if train_V:
                distances = np.sqrt((df[df.sat_id == sat_id].Vx_transformed.values) ** 2 + (df[df.sat_id == sat_id].Vy_transformed.values) ** 2)
                id_min = np.argmin(distances); id_max = np.argmax(distances)
                train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'small_polyos'] = distances[id_min]
                train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'big_polyos'] = distances[id_max]

                a = df.loc[df[df.sat_id == sat_id].index[0] + id_max][['Vx_transformed', 'Vy_transformed']].values #vector where big polyos
                cos_teta =  a[0] / np.sqrt(a[0] ** 2 + a[1] ** 2)
                sin_teta = a[1] / np.sqrt(a[0] ** 2 + a[1] ** 2)
                train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'cos_teta'] = cos_teta
                train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'sin_teta'] = sin_teta
            else:
                cos_teta = train_coefs_V[train_coefs_V.sat_id == sat_id].cos_teta.values[0]
                sin_teta = train_coefs_V[train_coefs_V.sat_id == sat_id].sin_teta.values[0]

            M = [[cos_teta, sin_teta],
            [-sin_teta, cos_teta]]

        velocity = (np.dot(M, df[df.sat_id == sat_id][['Vx_transformed', 'Vy_transformed']].values.T)).T
        df.loc[df.sat_id == sat_id, 'Vx_transformed'] = velocity[:, 0]
        df.loc[df.sat_id == sat_id, 'Vy_transformed'] = velocity[:, 1]

    return df, train_coefs    

In [16]:
full_train_new, train_coefs_new = fit_transform(full_train.copy(), train_coefs.copy(), train=True, train_coefs_V=None)
full_train_new.head()

ROTATE


Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim,x_transformed,y_transformed,z_transformed,Vx_transformed,Vy_transformed,Vz_transformed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
0,2014-01-01T00:00:00.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.22169,-20741.615306,-0.907527,-3.80493,-2.024133,35794.192615,1.818989e-12,-22.161497,9714.251354,-50.751224,0.367941
1,2014-01-01T00:46:43.000,0,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796,-10555.500066,1649.289367,-24473.089556,-0.303704,-4.269816,-0.616468,33572.925393,-12002.66,16.20764,9712.691445,-50.383628,0.368054
2,2014-01-01T01:33:26.001,0,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,-24271.169776,0.27488,-4.046788,0.718768,27489.640804,-22116.17,51.871159,9711.544248,-49.458535,0.366145
3,2014-01-01T02:20:09.001,0,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115,-9149.620794,-20618.200201,-20765.019094,0.712437,-3.375202,1.718306,18953.905812,-29271.83,80.489815,9710.938369,-48.328595,0.363199
4,2014-01-01T03:06:52.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,-14992.399986,0.989382,-2.522618,2.342237,9377.581413,-33289.15,100.44081,9710.777148,-47.248013,0.360066


In [18]:
def inv_transform(df, train_coefs, train_coefs_V=None):
    # INVERSE TRANSFORM in 2d (inv_shift_to_center_and_rotate)
    #rotate
    df['x_pred_transformed'] = df['x_pred']; df['y_pred_transformed'] = df['y_pred']; df['z_pred_transformed'] = df['z_pred']
    df['Vx_pred_transformed'] = df['Vx_pred']; df['Vy_pred_transformed'] = df['Vy_pred']; df['Vz_pred_transformed'] = df['Vz_pred']
    for sat_id in df.sat_id.unique():     
        cos_teta =  train_coefs.loc[train_coefs.sat_id == sat_id, 'cos_teta'].values[0]
        sin_teta = train_coefs.loc[train_coefs.sat_id == sat_id, 'sin_teta'].values[0]
        
        M = [[cos_teta, -sin_teta],
            [sin_teta, cos_teta]]
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_pred_transformed', 'y_pred_transformed']].values.T)).T
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = coord[:, 1]
        
        if train_coefs_V != None:
            cos_teta =  train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'cos_teta'].values[0]
            sin_teta = train_coefs_V.loc[train_coefs_V.sat_id == sat_id, 'sin_teta'].values[0]

            M = [[cos_teta, -sin_teta],
                [sin_teta, cos_teta]]
        
        velocity = (np.dot(M, df[df.sat_id == sat_id][['Vx_pred_transformed', 'Vy_pred_transformed']].values.T)).T
        df.loc[df.sat_id == sat_id, 'Vx_pred_transformed'] = velocity[:, 0]
        df.loc[df.sat_id == sat_id, 'Vy_pred_transformed'] = velocity[:, 1]            

    #shift
    for sat_id in df.sat_id.unique():
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = df[df.sat_id == sat_id].x_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].xc.values
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = df[df.sat_id == sat_id].y_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].yc.values
     
        if train_coefs_V == None:
            df.loc[df.sat_id == sat_id, 'Vx_pred_transformed'] = df[df.sat_id == sat_id].Vx_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].xc.values
            df.loc[df.sat_id == sat_id, 'Vy_pred_transformed'] = df[df.sat_id == sat_id].Vy_pred_transformed.values + train_coefs[train_coefs.sat_id == sat_id].yc.values
        else:
            df.loc[df.sat_id == sat_id, 'Vx_pred_transformed'] = df[df.sat_id == sat_id].Vx_pred_transformed.values + train_coefs_V[train_coefs_V.sat_id == sat_id].xc.values
            df.loc[df.sat_id == sat_id, 'Vy_pred_transformed'] = df[df.sat_id == sat_id].Vy_pred_transformed.values + train_coefs_V[train_coefs_V.sat_id == sat_id].yc.values
    
    # INVERSE TRANSFORM in 3d (inv_new_coord)
    for sat_id in df.sat_id.unique():
        A, B, C, D = train_coefs[train_coefs.sat_id == sat_id][['A', 'B', 'C', 'D']].values[0]
        d = np.sqrt(B ** 2 + C ** 2)
        
        # Mx(-alpha) * My(beta)
        M = [[d/np.sqrt(A ** 2 + d ** 2), 0, A / np.sqrt(A ** 2 + d ** 2)],
             [-B*A/d/ np.sqrt(A ** 2 + d ** 2), C/d, B / np.sqrt(A ** 2 + d ** 2)],
             [-C*A/d/ np.sqrt(A ** 2 + d ** 2), -B/d, C / np.sqrt(A ** 2 + d ** 2)]]        

        sz = df[df.sat_id == sat_id].shape[0]
        
        coord = (np.dot(M, df[df.sat_id == sat_id][['x_pred_transformed', 'y_pred_transformed', 'z_pred_transformed']].values.T) \
                + np.array([np.zeros(sz), np.zeros(sz), D/C * np.ones(sz)])).T
        df.loc[df.sat_id == sat_id, 'x_pred_transformed'] = coord[:, 0]
        df.loc[df.sat_id == sat_id, 'y_pred_transformed'] = coord[:, 1]
        df.loc[df.sat_id == sat_id, 'z_pred_transformed'] = coord[:, 2]
        
        if train_coefs_V != None:
            A, B, C, D = train_coefs_V[train_coefs_V.sat_id == sat_id][['A', 'B', 'C', 'D']].values[0]
            d = np.sqrt(B ** 2 + C ** 2)

            # Mx(-alpha) * My(beta)
            M = [[d/np.sqrt(A ** 2 + d ** 2), 0, A / np.sqrt(A ** 2 + d ** 2)],
                 [-B*A/d/ np.sqrt(A ** 2 + d ** 2), C/d, B / np.sqrt(A ** 2 + d ** 2)],
                 [-C*A/d/ np.sqrt(A ** 2 + d ** 2), -B/d, C / np.sqrt(A ** 2 + d ** 2)]]   
        
        velocity = (np.dot(M, df[df.sat_id == sat_id][['Vx_pred_transformed', 'Vy_pred_transformed', 'Vz_pred_transformed']].values.T) \
                + np.array([np.zeros(sz), np.zeros(sz), D/C * np.ones(sz)])).T
        df.loc[df.sat_id == sat_id, 'Vx_pred_transformed'] = velocity[:, 0]
        df.loc[df.sat_id == sat_id, 'Vy_pred_transformed'] = velocity[:, 1]
        df.loc[df.sat_id == sat_id, 'Vz_pred_transformed'] = velocity[:, 2]            

    return df

In [19]:
full_train_inv = full_train_new.copy()
full_train_inv['x_pred'] = full_train_inv['x_transformed']
full_train_inv['y_pred'] = full_train_inv['y_transformed']
full_train_inv['z_pred'] = full_train_inv['z_transformed']

full_train_inv['Vx_pred'] = full_train_inv['Vx_transformed']
full_train_inv['Vy_pred'] = full_train_inv['Vy_transformed']
full_train_inv['Vz_pred'] = full_train_inv['Vz_transformed']

inv_transform(full_train_inv, train_coefs_new)
full_train_inv.head()

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,...,z_pred,Vx_pred,Vy_pred,Vz_pred,x_pred_transformed,y_pred_transformed,z_pred_transformed,Vx_pred_transformed,Vy_pred_transformed,Vz_pred_transformed
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2014-01-01T00:00:00.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.22169,...,-22.161497,9714.251354,-50.751224,0.367941,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083
1,2014-01-01T00:46:43.000,0,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796,-10555.500066,1649.289367,...,16.20764,9712.691445,-50.383628,0.368054,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796
2,2014-01-01T01:33:26.001,0,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,...,51.871159,9711.544248,-49.458535,0.366145,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155
3,2014-01-01T02:20:09.001,0,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115,-9149.620794,-20618.200201,...,80.489815,9710.938369,-48.328595,0.363199,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115
4,2014-01-01T03:06:52.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,...,100.44081,9710.777148,-47.248013,0.360066,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703


In [20]:
# check
print(np.sum(np.abs(full_train_inv['x_pred_transformed'].values - full_train_new['x'].values) \
       + np.abs(full_train_inv['y_pred_transformed'].values - full_train_new['y'].values) \
       + np.abs(full_train_inv['z_pred_transformed'].values - full_train_new['z'].values)))

5.87595575096949e-06
