In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
import time
from tqdm import tqdm_notebook as tqdm


from AutoRegression import AutoRegression

In [2]:
PATH_TO_DATA = os.path.join('../data/')
full_train = pd.read_csv(os.path.join(PATH_TO_DATA, 'train.csv'), index_col='id')
full_test = pd.read_csv(os.path.join(PATH_TO_DATA, 'test.csv'), index_col='id')

In [3]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs((satellite_predicted_values - satellite_true_values) 
        / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values))))

In [2]:
WIDTH = 50  ## width of window, which will be used for prediction

In [4]:
def delete_duplicates(df, eps=10):
    """
        Returns df without "duplicates" - objects within each sat_id,
        which were recorded at almost same time
    """
    
    for sat_id in df['sat_id'].unique():
        d_t = df[df['sat_id'] == sat_id].epoch.apply(lambda x: time.mktime(datetime.strptime(x, '%Y-%m-%dT%H:%M:%S.%f').timetuple())).values
        df = df.drop(index=df[df['sat_id'] == sat_id].index[0] + np.where((np.roll(d_t, -1)[:-1] - d_t[:-1]) < eps)[0])
    return df

In [5]:
full_train = delete_duplicates(full_train)
full_test_wout_dup = delete_duplicates(full_test.copy())

## Predict for test dataset without duplicates:

In [6]:
target_columns = ['x', 'y', 'z', 'Vx', 'Vy', 'Vz']


for sat_id in tqdm(full_test_wout_dup.sat_id.unique()):
    df = full_train[full_train.sat_id == sat_id]
    test_satid_mask = full_test_wout_dup.sat_id == sat_id
    for col in target_columns:
        model = AutoRegression(width=WIDTH)
        model.fit(df[col].values)
        full_test_wout_dup.loc[full_test_wout_dup[test_satid_mask].index, col] =\
            model.predict(len(full_test_wout_dup[test_satid_mask]))

HBox(children=(IntProgress(value=0, max=300), HTML(value='')))




## After this we need to predict for deleted objects (duplicates) from test, so we decided to fill predicts with values of nearest objects in time domain.

In [7]:
final_df = pd.concat([full_test, full_test_wout_dup[target_columns]], axis=1)
final_df.fillna(method='ffill', inplace=True)

## Saving predicts

In [8]:
final_df[target_columns].to_csv('submission.csv', index_label='id')

## Some checks to prevent submit errors

In [9]:
final_df[target_columns].isna().sum()

x     0
y     0
z     0
Vx    0
Vy    0
Vz    0
dtype: int64

In [16]:
figure = plt.figure(figsize=(10, 5))


sat_id = 261
real_col_name = 'x'
sim_col_name = real_col_name + '_sim'

fig = go.Figure()
fig.add_trace(go.Scatter(x=final_df[final_df.sat_id == sat_id].epoch,
                             y=final_df[final_df.sat_id == sat_id][real_col_name],
                             name="Estimated " + real_col_name,
                             ))

fig.add_trace(go.Scatter(x=final_df[final_df.sat_id == sat_id].epoch,
                         y=final_df[final_df.sat_id == sat_id][sim_col_name],
                         name="Simulation " + real_col_name,
                         ))

fig.add_trace(go.Scatter(x=final_df[final_df.sat_id == sat_id].epoch,
                         y=final_df[final_df.sat_id == sat_id][sim_col_name],
                         name="Sim_test " + real_col_name,
                         ))
fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][real_col_name],
                         name="Real " + real_col_name,
                         ))

fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][sim_col_name],
                         name="Simulation " + real_col_name,
                         ))

fig.update_layout(title_text='Time Series for ' + real_col_name,
                  xaxis_rangeslider_visible=True,
                  yaxis_title=real_col_name)
fig.show()

<Figure size 720x360 with 0 Axes>