In [285]:
from tqdm import tqdm
import numpy as np
import pandas as pd
from scipy.interpolate import splrep, splev
import datetime as dt
import matplotlib.pyplot as plt
from fbprophet import Prophet
from scipy.interpolate import InterpolatedUnivariateSpline
%matplotlib inline
plt.rcParams.update({'font.size': 18})

In [286]:
def smape(satellite_predicted_values, satellite_true_values):
    # the division, addition and subtraction are pointwise
    return np.mean(np.abs((satellite_predicted_values - satellite_true_values)/
                (np.abs(satellite_predicted_values) + np.abs(satellite_true_values))))


def drop_close(t, x, eps=10**9):
    '''
    t = time array, x = data array, eps is in nanoseconds
    Returns entries in t,x with corresponding consecutive times > eps
    
    '''
    t = np.array(t) #if not already np array, convert
    x = np.array(x) #if t or x are pandas Series, will have dimension mismatch
    far = np.concatenate([(t[1:] - t[:-1]) > eps, [True]])
    return t[far], x[far]


def resample(t, x, step=10 * 10**9, t_new=None, return_t=False):
    '''
    t: time array (or series); 
    x: data array (or series); 
    t_new: new time scale from start to end of t with step size step;
    step: = 10 seconds by default; 
    return_t: by default, do not return resampled times
    
    resample time series or array by 10 (default) sec intervals and 
    return new time series (if t_new=True) and spline approximation series for data
    
    
    '''
    t, x = drop_close(t, x)
    if t_new is None:
        t_new = np.arange(t[0], t[-1], step)
    try:
        spl = splrep(t, x)
        x_new = splev(t_new, spl)
    except:
        raise ValueError(f'interpolation error, x length = {len(x)}, \
        t_new length = {len(t_new)}')

    return (t_new, x_new) if return_t else x_new


def get_peaks(array):
    '''
    returns index of "sharp" peaks, excluding first and last values of array
    
    index of "smooth peaks", e.g. 1 2 3 9 9 3 2 1, is not returned
    '''
    return np.where((array[1:-1] > array[2:]) & (array[1:-1] > array[:-2]))[0] + 1


def get_satellite_data(data, sat_id):
    '''
    returns all data for particular satellite by id
    '''
    return data[data['sat_id'] == sat_id]


def remove_time_jumps(satellite_data, features_list=('x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim'),
                      suffix='_jumps_removed',time_threshold = 0.00003):
    #time_threshold 0.00003 sufficient for test and train
    #time_threshold 0.00002 will throw errors
    '''
    removes time jumps in the simulation for a single satellite
    for train and test data, sufficient to set time_threshold at default
    '''
    satellite_data['t'] = ((pd.to_datetime(satellite_data['epoch']) - pd.to_datetime(satellite_data['epoch'])[0]) /
                           np.timedelta64(1, 'D')).astype(float)
    satellite_data['dt'] = satellite_data['t'].diff()
    # the most frequent time interval
    t_standard = satellite_data['dt'].value_counts().index[0]

    # time steps used for simulations
    satellite_data['t_sim'] = satellite_data.index.values * t_standard

    n = satellite_data.shape[0]
    corrected_features = []
    for feature_name in features_list:
        corrected_feature = [0] * n
        corrected_feature[0] = satellite_data[feature_name][0]

        feature = satellite_data[feature_name]

        for j in range(1, n - 1):
            if satellite_data.t[j] < satellite_data.t_sim[j] - time_threshold:
                # approximate by the left side
    
                # look for the interval
                step = 0
                while satellite_data.t[j] < satellite_data.t_sim[j - step] - time_threshold:
                    step += 1
                #             print(step)
                corrected_feature[j] = feature[j - step] - (satellite_data.t_sim[j - step] - satellite_data.t[j]) / (
                            satellite_data.t_sim[j - step] - satellite_data.t_sim[j - step - 1]) * (
                                              feature[j - step] - feature[j - step - 1])
            elif satellite_data.t[j] > satellite_data.t_sim[j] + time_threshold:
                # approximate by the right side
    
                # look for the interval
                step = 0
                while satellite_data.t[j] > satellite_data.t_sim[j + step] + time_threshold:
                    step += 1
    
                corrected_feature[j] = feature[j + step + 1] - (satellite_data.t_sim[j + step + 1] - satellite_data.t[j]) / (
                            satellite_data.t_sim[j + step + 1] - satellite_data.t_sim[j + step]) * (
                                              feature[j + step + 1] - feature[j + step])
            else:
                corrected_feature[j] = feature[j]
    
        corrected_feature[n - 1] = feature[n - 1] + corrected_feature[n - 2] - feature[n - 2]
        corrected_features.append(corrected_feature)
    return pd.DataFrame(corrected_features, index=[f + suffix for f in features_list]).T

In [287]:
def insert_previous_and_shift(df,col_name,ind):
    '''
    input a data frame (df), column name (col_name), and index (ind)
    insert previous value of df[col_name] at index and shift the rest 
    of df[col_name] from ind by +1;
    This is used for remove_time_jumps_fast
    '''
    shifted_series = df[col_name].shift(1)
    df[col_name].iloc[ind] = df[col_name].iloc[ind-1]
    df[col_name].iloc[ind+1:] = shifted_series.iloc[ind+1:]
    return df

In [288]:
def remove_time_jumps_fast(data, features_list=
                           ('x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim'),
                           threshold = 0.000001):
    #time_threshold 0.00003 sufficient for test and train
    #time_threshold 0.00002 will throw errors
    '''
    removes time jumps in the simulation for a single satellite
    for train and test data, sufficient to set time_threshold at default
    s_data = satellite data
    the features are replaced by the correction
    note that threshold here is not the same as in remove_time_jumps
    '''
    data['t'] = ((pd.to_datetime(data['epoch']) - pd.to_datetime(data['epoch'])[0]) /
                               np.timedelta64(1, 'D')).astype(float)
    data['dt'] = data['t'].diff(1)

    index_for_correction = data[data['dt'] < threshold].index 
    #print(index_for_correction)
    if list(index_for_correction): #if non empty
        for feature in features_list:
            for i in index_for_correction:
                data = insert_previous_and_shift(data,feature,i)
    return data

## Transforming all the data

In [289]:
import utils
from LinearAlignment import LinearAlignment

In [290]:
features_list=['x', 'y', 'z', 'Vx', 'Vy', 'Vz']

In [291]:
data = pd.read_csv('data/train.csv', index_col = 'id')
data['time'] = data['epoch']
data['epoch'] = pd.to_datetime(data['epoch']).values.astype(float)
data.head()

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim,time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,1.388534e+18,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.22169,-20741.615306,-0.907527,-3.80493,-2.024133,2014-01-01T00:00:00.000
1,1.388537e+18,0,-10567.672384,1619.746066,-24451.813271,-0.30259,-4.272617,-0.612796,-10555.500066,1649.289367,-24473.089556,-0.303704,-4.269816,-0.616468,2014-01-01T00:46:43.000
2,1.38854e+18,0,-10578.684043,-10180.46746,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,-24271.169776,0.27488,-4.046788,0.718768,2014-01-01T01:33:26.001
3,1.388543e+18,0,-9148.251857,-20651.43746,-20720.381279,0.7156,-3.373762,1.722115,-9149.620794,-20618.200201,-20765.019094,0.712437,-3.375202,1.718306,2014-01-01T02:20:09.001
4,1.388546e+18,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,-14992.399986,0.989382,-2.522618,2.342237,2014-01-01T03:06:52.002


In [292]:
test_data = pd.read_csv('data/test.csv', index_col = 'id')
test_data['time'] = test_data['epoch']
test_data['epoch'] = pd.to_datetime(test_data['epoch']).values.astype(float)

In [293]:
test_data.head()

Unnamed: 0_level_0,sat_id,epoch,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim,time
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3927,1,1.391213e+18,-13366.891347,-14236.753503,6386.774555,4.333815,-0.692764,0.810774,2014-02-01T00:01:45.162
3928,1,1.391214e+18,-7370.434039,-14498.77152,7130.411325,5.077413,0.360609,0.313402,2014-02-01T00:22:57.007
3929,1,1.391215e+18,-572.068654,-13065.289498,7033.794876,5.519106,2.01283,-0.539412,2014-02-01T00:44:08.852
3930,1,1.391217e+18,6208.945257,-9076.852425,5548.2969,4.849212,4.338955,-1.8696,2014-02-01T01:05:20.697
3931,1,1.391218e+18,10768.200284,-2199.706707,2272.014862,1.940505,6.192887,-3.167724,2014-02-01T01:26:32.542


In [294]:
result_df = []
alignment_model = LinearAlignment()

satellites_list = test_data['sat_id'].unique()

transf_df = pd.DataFrame([])
# for sat_id in tqdm(satellites_list):
for sat_id in tqdm(satellites_list):
    
#     sat_data = utils.get_satellite_data(data, sat_id)
    try:
        sat_data = get_satellite_data(data, sat_id)
        n_first = sat_data.shape[0]
        
        test_sat_data = get_satellite_data(test_data, sat_id)
        
        sat_data = pd.concat([sat_data,test_sat_data], axis = 0, sort = False)
                
        index = sat_data.index
        pred = pd.DataFrame(index = index )
        
        sat_data = remove_time_jumps_fast(sat_data.reset_index(drop=True))
        sat_data.set_index(index)
#         sat_data = get_satellite_data(data, sat_id) # run to check the smape score before any transformations
    except KeyError as e:
        print(f'jump removal failed for satellite {sat_id}:\t{type(e).__name__} {e}')
        continue
    
    
    n_train = 4*len(sat_data) // 10
    
    train_sat_data = sat_data[:n_train]
    
    
    pred['epoch'] = sat_data['epoch'].values
    pred['t'] = sat_data['t'].values
    pred['sat_id'] = sat_id
    
#     if sat_id in set([372,587,523,473,514]):
        
#         for feature_name in features_list:
#             pred.loc[:,f'{feature_name}_sim']  = sat_data.loc[:, f'{feature_name}_sim'].values
#             pred.loc[:,feature_name]=sat_data.loc[:, feature_name].values
#     else:
        
    try:

        for feature_name in features_list:
            alignment_model.fit(t=train_sat_data['epoch'].values,
                                x=-train_sat_data[f'{feature_name}_sim'].values,
                                gt=-train_sat_data[feature_name].values)
            option1 = alignment_model.predict(t=sat_data['epoch'].values,
                                                         x=sat_data[f'{feature_name}_sim'].values)
            alignment_model.fit(t=train_sat_data['epoch'].values,
                                x= train_sat_data[f'{feature_name}_sim'].values,
                                gt= train_sat_data[feature_name].values)

            option2 = alignment_model.predict(t=sat_data['epoch'].values,
                                                         x=sat_data[f'{feature_name}_sim'].values)

            if utils.smape(option1[n_train:n_first],sat_data[feature_name].values[n_train:n_first]) < utils.smape(option2[n_train:n_first],sat_data[feature_name].values[n_train:n_first]):
                alignment_model.fit(t=sat_data['epoch'].iloc[:n_first].values,
                                x=-sat_data[f'{feature_name}_sim'].iloc[:n_first].values,
                                gt=-sat_data[feature_name].iloc[:n_first].values)

                pred[f'{feature_name}_sim'] = alignment_model.predict(t=sat_data['epoch'].values,
                                                         x=sat_data[f'{feature_name}_sim'].values)
            else:
                alignment_model.fit(t=sat_data['epoch'].iloc[:n_first].values,
                                x=sat_data[f'{feature_name}_sim'].iloc[:n_first].values,
                                gt=sat_data[feature_name].iloc[:n_first].values)
                pred.loc[:,f'{feature_name}_sim']  = alignment_model.predict(t=sat_data['epoch'].values,
                                                         x=sat_data[f'{feature_name}_sim'].values)

            pred.loc[:,feature_name]=sat_data.loc[:, feature_name].values



    except Exception as e:
        print(f'linear alignment failed for satellite {sat_id}:\t{type(e).__name__} {e}')
        continue
    
    transf_df = transf_df.append(pred, sort = False)


  0%|                                                                                          | 0/300 [00:00<?, ?it/s]
  0%|▎                                                                                 | 1/300 [00:00<04:36,  1.08it/s]
  1%|▌                                                                                 | 2/300 [00:01<04:06,  1.21it/s]
  1%|▊                                                                                 | 3/300 [00:02<03:39,  1.35it/s]
  1%|█                                                                                 | 4/300 [00:02<03:21,  1.47it/s]
  2%|█▎                                                                                | 5/300 [00:03<03:05,  1.59it/s]
  2%|█▋                                                                                | 6/300 [00:03<02:55,  1.68it/s]
  2%|█▉                                                                                | 7/300 [00:04<03:33,  1.37it/s]
  3%|██▏                               

 23%|██████████████████▎                                                              | 68/300 [00:49<02:14,  1.73it/s]
 23%|██████████████████▋                                                              | 69/300 [00:49<02:10,  1.76it/s]
 23%|██████████████████▉                                                              | 70/300 [00:50<02:08,  1.79it/s]
 24%|███████████████████▏                                                             | 71/300 [00:51<02:07,  1.79it/s]
 24%|███████████████████▍                                                             | 72/300 [00:51<02:10,  1.74it/s]
 24%|███████████████████▋                                                             | 73/300 [00:52<02:10,  1.74it/s]
 25%|███████████████████▉                                                             | 74/300 [00:53<02:46,  1.36it/s]
 25%|████████████████████▎                                                            | 75/300 [00:54<02:43,  1.38it/s]
 25%|████████████████████▌              

 45%|████████████████████████████████████▎                                           | 136/300 [01:29<01:35,  1.71it/s]
 46%|████████████████████████████████████▌                                           | 137/300 [01:30<01:33,  1.75it/s]
 46%|████████████████████████████████████▊                                           | 138/300 [01:30<01:34,  1.71it/s]
 46%|█████████████████████████████████████                                           | 139/300 [01:31<01:32,  1.73it/s]
 47%|█████████████████████████████████████▎                                          | 140/300 [01:31<01:31,  1.75it/s]
 47%|█████████████████████████████████████▌                                          | 141/300 [01:32<01:36,  1.64it/s]
 47%|█████████████████████████████████████▊                                          | 142/300 [01:33<01:33,  1.68it/s]
 48%|██████████████████████████████████████▏                                         | 143/300 [01:33<01:30,  1.73it/s]
 48%|███████████████████████████████████

 68%|██████████████████████████████████████████████████████▍                         | 204/300 [02:14<01:04,  1.48it/s]
 68%|██████████████████████████████████████████████████████▋                         | 205/300 [02:14<01:03,  1.49it/s]
 69%|██████████████████████████████████████████████████████▉                         | 206/300 [02:15<01:00,  1.55it/s]
 69%|███████████████████████████████████████████████████████▏                        | 207/300 [02:16<00:58,  1.58it/s]
 69%|███████████████████████████████████████████████████████▍                        | 208/300 [02:16<00:57,  1.61it/s]
 70%|███████████████████████████████████████████████████████▋                        | 209/300 [02:17<00:57,  1.59it/s]
 70%|████████████████████████████████████████████████████████                        | 210/300 [02:18<00:57,  1.56it/s]
 70%|████████████████████████████████████████████████████████▎                       | 211/300 [02:18<00:58,  1.53it/s]
 71%|███████████████████████████████████

 91%|████████████████████████████████████████████████████████████████████████▌       | 272/300 [03:00<00:21,  1.31it/s]
 91%|████████████████████████████████████████████████████████████████████████▊       | 273/300 [03:01<00:21,  1.26it/s]
 91%|█████████████████████████████████████████████████████████████████████████       | 274/300 [03:02<00:19,  1.34it/s]
 92%|█████████████████████████████████████████████████████████████████████████▎      | 275/300 [03:02<00:18,  1.36it/s]
 92%|█████████████████████████████████████████████████████████████████████████▌      | 276/300 [03:03<00:18,  1.33it/s]
 92%|█████████████████████████████████████████████████████████████████████████▊      | 277/300 [03:04<00:16,  1.40it/s]
 93%|██████████████████████████████████████████████████████████████████████████▏     | 278/300 [03:04<00:15,  1.45it/s]
 93%|██████████████████████████████████████████████████████████████████████████▍     | 279/300 [03:05<00:14,  1.49it/s]
 93%|███████████████████████████████████

In [295]:
transf_df.head()

Unnamed: 0_level_0,epoch,t,sat_id,x_sim,x,y_sim,y,z_sim,z,Vx_sim,Vx,Vy_sim,Vy,Vz_sim,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1819,1.388534e+18,0.0,1,10405.813755,10390.313089,-2771.18076,-2796.458271,3166.926302,3179.562085,2.508879,2.520477,6.152996,6.14993,-2.826227,-2.827599
1820,1.388536e+18,0.01472,1,11199.459274,11195.606833,5105.200658,5078.653968,-848.597754,-839.076593,-1.132222,-1.126667,5.822436,5.826412,-3.250054,-3.255872
1821,1.388537e+18,0.029441,1,8237.279843,8235.556436,11469.206013,11445.904263,-4684.829256,-4680.514023,-3.228498,-3.230227,4.124297,4.126433,-2.700287,-2.704441
1822,1.388538e+18,0.044161,1,3565.004307,3560.149776,15655.733441,15634.195146,-7655.277835,-7654.177182,-3.960607,-3.964696,2.521337,2.520867,-1.97677,-1.978151
1823,1.388539e+18,0.058882,1,-1571.839073,-1580.476891,18044.683022,18023.318335,-9755.479443,-9755.287599,-4.046854,-4.050865,1.298053,1.296388,-1.347061,-1.346512


In [296]:
transf_df.shape

(599254, 15)

In [None]:
# # for now, simply append non-transformed values fot the satellites 252
# transf_df = transf_df.append(remove_time_jumps_fast(get_satellite_data(data, 252))[transf_df.columns])
# transf_df = transf_df.append(remove_time_jumps_fast(get_satellite_data(data, 301))[transf_df.columns])
# transf_df.shape

In [306]:
import numpy as np
np.random.seed(50)

fail_list = []
satellites_list = transf_df['sat_id'].unique()
res = pd.DataFrame([])
for sat_id in tqdm(satellites_list[:12]):
    
    
    
    sat_data = transf_df[transf_df.sat_id == sat_id]
    
    n_train = 4* len(sat_data) // 10
    n_first = len(sat_data[~pd.isna(sat_data.x)])

    pred = pd.DataFrame(index = sat_data.iloc[n_first:].index)
    pred['epoch'] = sat_data['epoch'].iloc[n_first:].values
    pred['t'] = sat_data['t'].iloc[n_first:].values
    pred['sat_id'] = sat_id

    if sat_id in [26]:
        for feature_name in features_list:
            pred[feature_name] = sat_data[f'{feature_name}_sim'].values[n_first:]
    else:
        try:

            for feature_name in features_list:

                q = InterpolatedUnivariateSpline(sat_data['t'][:n_train], sat_data[f'{feature_name}_sim'][:n_train]).roots()
                T = (q[2]-q[0])

                # create a DataFrame for prophet model()
                ts = pd.DataFrame([])
                ts['ds'] = pd.to_datetime(sat_data ['epoch']).values
                ts['y'] = sat_data [feature_name].values-sat_data [f'{feature_name}_sim'].values

                # define the time frame for the first forecast 
                future =ts[n_train:n_first] [['ds']]

                # define the model
                m = Prophet(changepoint_prior_scale = T/20,
                weekly_seasonality = False,
                yearly_seasonality = False).add_seasonality(name='grid', period = T, fourier_order=50)

                #fit the model and make the forecast
                m.fit(ts[:n_train])
                forecast = m.predict(future)

                new_values = sat_data.loc[:,f'{feature_name}_sim'].iloc[n_train:n_first].values+forecast.loc[:, 'yhat'].values

                option1 = sat_data[f'{feature_name}_sim'].values


                option2 = np.concatenate((sat_data[f'{feature_name}_sim'].values[:n_train], new_values), axis=None)


                print('{}[{}]  no correction: {}, correction:{}'.format(sat_id,
                                                                    feature_name,
                                                                    utils.smape(option1[n_train:n_first],sat_data[feature_name].iloc[n_train:n_first].values),
                                                                    utils.smape(option2[n_train:n_first],sat_data[feature_name].iloc[n_train:n_first].values)))

                if utils.smape(option1[n_train:n_first],sat_data[feature_name].iloc[n_train:n_first].values) < utils.smape(option2[n_train:n_first],sat_data[feature_name].iloc[n_train:n_first].values):
                    pred[feature_name] = option1[n_first:]
                    print(sat_id, feature_name, 'does not work')
                    fail_list.append((sat_id, feature_name))
                else:
                    # fit a new model on the whole training range

                    # define the time frame for the first forecast 
                    future =ts[n_first:] [['ds']]

                    # define the model
                    m = Prophet(changepoint_prior_scale = T/20,
                    weekly_seasonality = False,
                    yearly_seasonality = False).add_seasonality(name='grid', period = T, fourier_order=50)

                    #fit the model and make the forecast
                    m.fit(ts[:n_first])
                    forecast = m.predict(future)

                    pred[feature_name]  = sat_data.loc[:,f'{feature_name}_sim'].iloc[n_first:].values+forecast.loc[:, 'yhat'].values





        except Exception as e:
            print(f'phophet failed for satellite {sat_id}:\t{type(e).__name__} {e}')
            continue
        
#     sat_sim_smape = utils.smape(sat_data.loc[n_train:, [f'{f}_sim' for f in features_list]].values,
#                                 sat_data.loc[n_train:, features_list].values)
#     sat_new_smape = utils.smape(pred.loc[n_train:, features_list].values,
#                                 sat_data.loc[n_train:, features_list].values)
#     result_df.append([sat_id, sat_sim_smape, sat_new_smape])
    res = res.append(pred, sort=False)




  0%|                                                                                           | 0/12 [00:00<?, ?it/s]

1[x]  no correction: 0.19119642955303978, correction:0.038746070579538786
1[y]  no correction: 0.18267242666654607, correction:0.0015251064080587693
1[z]  no correction: 0.17772439658633013, correction:0.10822315641831583
1[Vx]  no correction: 0.1498960586239248, correction:0.13625637376844116
1[Vy]  no correction: 0.1736203022100542, correction:0.0035356489053515474


KeyboardInterrupt: 

In [304]:
res.head()

Unnamed: 0_level_0,epoch,t,sat_id,x,y,z,Vx,Vy,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3927,1.391213e+18,31.001217,1,-24616.419005,-10911.913083,6820.955452,3.084691,-1.304178,1.158415
3928,1.391214e+18,31.015938,1,-20565.052445,-12384.918166,7997.732676,3.779731,-0.994383,0.974036
3929,1.391215e+18,31.030658,1,-15631.594879,-13379.047335,8708.483922,4.495015,-0.538857,0.681731
3930,1.391217e+18,31.045378,1,-9832.409606,-13654.651366,8630.504434,5.064576,0.158446,0.208317
3931,1.391218e+18,31.060099,1,-3350.292988,-12793.569884,7249.772139,4.952802,1.293845,-0.592116


In [305]:
res.to_csv('sub10_12_299.csv')

In [583]:
res1 = pd.read_csv('sub10_12_299.csv', index_col = 'id')
res1.head()

Unnamed: 0_level_0,epoch,t,sat_id,x,y,z,Vx,Vy,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3927,1.391213e+18,31.001217,1,-24616.419005,-10911.913083,6820.955452,3.084691,-1.304178,1.158415
3928,1.391214e+18,31.015938,1,-20565.052445,-12384.918166,7997.732676,3.779731,-0.994383,0.974036
3929,1.391215e+18,31.030658,1,-15631.594879,-13379.047335,8708.483922,4.495015,-0.538857,0.681731
3930,1.391217e+18,31.045378,1,-9832.409606,-13654.651366,8630.504434,5.064576,0.158446,0.208317
3931,1.391218e+18,31.060099,1,-3350.292988,-12793.569884,7249.772139,4.952802,1.293845,-0.592116


In [584]:
res1.shape

(283869, 9)

In [585]:
test_data.shape

(284071, 9)

In [586]:
set(test_data.sat_id.values).difference(set(res1.sat_id.values))

{26, 301}

In [587]:
sat_301_data = transf_df[transf_df.sat_id ==301]
sat_301_data = sat_301_data[pd.isna(sat_301_data.x)]

In [588]:
sat_301_data = sat_301_data.drop(columns=['x','y','z','Vx','Vy','Vz'])
sat_301_data.columns = ['epoch','t','sat_id','x','y','z','Vx','Vy','Vz']
sat_301_data.head()

Unnamed: 0_level_0,epoch,t,sat_id,x,y,z,Vx,Vy,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
589202,1.391235e+18,31.257923,301,-52592.999298,-60074.10289,-139573.635865,-1.323422,0.071619,0.418158
589203,1.391262e+18,31.573659,301,-86473.537938,-54655.422454,-158762.920979,-1.185425,0.208074,0.443904
589204,1.39129e+18,31.889396,301,-116576.927142,-47126.408813,-171085.163335,-1.049094,0.282414,0.469019
589205,1.391317e+18,32.205133,301,-143117.229722,-38203.789612,-178168.278315,-0.92178,0.33074,0.493551
589206,1.391344e+18,32.520869,301,-166305.94623,-28399.763252,-181059.157342,-0.801008,0.361295,0.517539


In [589]:
res1 = res1.append(sat_301_data)
res1.shape

(283957, 9)

In [590]:
sat_26_data = transf_df[transf_df.sat_id ==26]
sat_26_data = sat_26_data[pd.isna(sat_26_data.x)]
sat_26_data = sat_26_data.drop(columns=['x','y','z','Vx','Vy','Vz'])
sat_26_data.columns = ['epoch','t','sat_id','x','y','z','Vx','Vy','Vz']
sat_26_data.head()

Unnamed: 0_level_0,epoch,t,sat_id,x,y,z,Vx,Vy,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
50725,1.391232e+18,31.221753,26,148699.986678,-15261.348146,-170190.755733,0.375843,0.72151,-0.262729
50726,1.391253e+18,31.465673,26,151614.711956,594.218047,-171985.237039,0.263672,0.690406,-0.13189
50727,1.391274e+18,31.709593,26,152288.113678,16428.611911,-171184.891073,0.156594,0.648208,-0.008761
50728,1.391295e+18,31.953512,26,150744.241316,32005.87651,-167885.541702,0.052642,0.59344,0.109114
50729,1.391316e+18,32.197432,26,146971.313778,47111.141442,-162141.599215,-0.049894,0.523472,0.223752


In [591]:
res1 = res1.append(sat_26_data)
res1.shape

(284071, 9)

In [592]:
# Coorection by hand (need to do something about it)
sat_id = 252
val = res1[res1.sat_id == sat_id].x.values
val[-65:] = 0
index = res1[res1.sat_id == sat_id].index
res1.loc[index,'x'] = val

val = res1[res1.sat_id == sat_id].Vx.values
val[-4:] = 0
index = res1[res1.sat_id == sat_id].index
res1.loc[index,'Vx'] = val

val = res1[res1.sat_id == sat_id].y.values
val[-1] = val[-2]
index = res1[res1.sat_id == sat_id].index
res1.loc[index,'y'] = val

val = res1[res1.sat_id == sat_id].Vy.values
val[-4:] = 0
index = res1[res1.sat_id == sat_id].index
res1.loc[index,'Vy'] = val

val = res1[res1.sat_id == sat_id].Vz.values
val[-84:] = 0
index = res1[res1.sat_id == sat_id].index
res1.loc[index,'Vz'] = val


for sat_id in [372,523,587,473,514]:
    for f in ['x','y','z','Vx','Vy','Vz']:
        val = res1[res1.sat_id == sat_id][f].values
        val[-50:] = 0
        index = res1[res1.sat_id == sat_id].index
        res1.loc[index,f] = val







In [593]:
sub = pd.read_csv('data/submission.csv', index_col = 'id')

In [594]:
sub.head()

Unnamed: 0_level_0,x,y,z,Vx,Vy,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3927,-13366.891347,-14236.753503,6386.774555,4.333815,-0.692764,0.810774
3928,-7370.434039,-14498.77152,7130.411325,5.077413,0.360609,0.313402
3929,-572.068654,-13065.289498,7033.794876,5.519106,2.01283,-0.539412
3930,6208.945257,-9076.852425,5548.2969,4.849212,4.338955,-1.8696
3931,10768.200284,-2199.706707,2272.014862,1.940505,6.192887,-3.167724


In [595]:
sumission_index = sub.index
res1 = res1.loc[sumission_index,:]


In [598]:
res1 = res1.drop(columns = ['epoch','t','sat_id'])
res1.head()

Unnamed: 0_level_0,x,y,z,Vx,Vy,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3927,-24616.419005,-10911.913083,6820.955452,3.084691,-1.304178,1.158415
3928,-20565.052445,-12384.918166,7997.732676,3.779731,-0.994383,0.974036
3929,-15631.594879,-13379.047335,8708.483922,4.495015,-0.538857,0.681731
3930,-9832.409606,-13654.651366,8630.504434,5.064576,0.158446,0.208317
3931,-3350.292988,-12793.569884,7249.772139,4.952802,1.293845,-0.592116


In [599]:
df.to_csv('submission_9.csv')