In [1]:
PATH_TRAIN = "../competition_results/data/train.csv" 
PATH_TEST = "../competition_results/data/test.csv"
PATH_SAVE_TRAIN = "../competition_results/data/train_fixed_period.csv"
PATH_SAVE_TEST = "../competition_results/data/train_fixed_period.csv"

In [2]:
from utils.coosys import cartesian_to_kepler as ctk, cartesian_to_quaternion as ctq
from utils.coosys import kepler_to_cartesian as ktc, quaternion_to_cartesian as qtc
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.linear_model import LinearRegression as LR
from sklearn.preprocessing import PolynomialFeatures as PF
from sklearn.model_selection import train_test_split as tts
from scipy.interpolate import UnivariateSpline as US
from sklearn.pipeline import Pipeline as PL
from sklearn.preprocessing import StandardScaler as SS
from scipy import signal
import spectrum
from tqdm.auto import tqdm
tqdm.pandas()

from matplotlib import pyplot as plt
%matplotlib inline

  from pandas import Panel


In [3]:
train_data = pd.read_csv(PATH_TRAIN, encoding="utf8")
train_data["timestamp"] = pd.to_datetime(train_data["epoch"]).apply(pd.Timestamp.timestamp)
train_data = train_data.sort_values(by="timestamp")

test_data = pd.read_csv(PATH_TEST, encoding="utf8")
test_data["timestamp"] = pd.to_datetime(test_data["epoch"]).apply(pd.Timestamp.timestamp)
test_data = test_data.sort_values(by="timestamp")

In [4]:
from scipy.signal import argrelmax, argrelmin

def evaluate_T(t, x):
    return np.mean([np.mean(np.diff(t[argrelmin(x)])), np.mean(np.diff(t[argrelmax(x)]))])

def evaluate_T_all(t, x):
    return np.mean([evaluate_T(t, x[:, i]) for i in range(x.shape[1])])

def prolong_sim(epoch, sim, T_sim, T_true, eps=0.1):
    prolong_coef = T_sim / T_true + eps
    
    if prolong_coef < 1.:
        return US(epoch, sim, k=1, s=0.)
    
    else:
        future_periods = int((epoch[-1] - epoch[0]) / T_sim * (prolong_coef - 1.)) + 1
        
        full_epoch = np.zeros(epoch.size + future_periods * 24)
        full_epoch[:epoch.size] = epoch[:]
        
        full_sim = np.zeros(sim.size + future_periods * 24)
        full_sim[:sim.size] = sim[:]
        
        for i in range(24):
            block_epoch = epoch[i::24]
            block_sim = sim[i::24]
            
            block_future_epoch = (np.arange(future_periods) + 1) * T_sim + block_epoch[-1]
            
            base = LR().fit(block_epoch.reshape(-1, 1), block_sim)
            block_predict = base.predict(block_future_epoch.reshape(-1, 1))
            
            full_epoch[i::24][-future_periods:] = block_future_epoch[:]
            full_sim[i::24][-future_periods:] = block_predict[:]
            
        return US(full_epoch, full_sim, k=1, s=0.)
    
    
def prolong_sim_all(epoch, sim, T_sim, T_true, eps=0.1):
    return [prolong_sim(epoch, sim[:, i], T_sim, T_true, eps=0.1) for i in range(sim.shape[1])]

In [5]:
def fix_sat(train, test, sat_id):
    train_sat = train[train.sat_id == sat_id]
    test_sat = test[test.sat_id == sat_id]
    
    train_sim = train_sat[["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]].to_numpy()
    test_sim = test_sat[["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]].to_numpy()
    train_epoch = train_sat["timestamp"].to_numpy()
    test_epoch = test_sat["timestamp"].to_numpy()
    
    begin = np.min(train_epoch)
    
    train_epoch -= begin
    test_epoch -= begin
    
    all_sim = np.concatenate([train_sim, test_sim], axis=0)
    all_epoch = np.concatenate([train_epoch, test_epoch], axis=0)
    
    train_true = train_sat[["x", "y", "z", "Vx", "Vy", "Vz"]].to_numpy()
    
    T_sim = evaluate_T_all(all_epoch, all_sim)
    T_true = evaluate_T_all(train_epoch, train_true)
    
    koef = T_true / T_sim
    
    train_splines = prolong_sim_all(all_epoch, all_sim, T_sim, T_true)
    
    fixed_sim = np.zeros_like(all_sim)
    for i in range(fixed_sim.shape[0]):
        for j in range(fixed_sim.shape[1]):
            fixed_sim[i, j] = train_splines[j](all_epoch[i] / koef)
            
    
    new_train = pd.DataFrame(columns=["id", "epoch", "sat_id", "x", "y", "z", "Vx", "Vy", "Vz", 
                                      "x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"])
    new_train["id"] = train_sat["id"]
    new_train["sat_id"] = sat_id     
    new_train[["x", "y", "z", "Vx", "Vy", "Vz"]] = train_sat[["x", "y", "z", "Vx", "Vy", "Vz"]]
    new_train["epoch"] = train_sat["epoch"]
    new_train[["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]] = fixed_sim[:train_true.shape[0]]
    
    new_test = pd.DataFrame(columns=["id", "epoch", "sat_id",
                                     "x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"])
    new_test["id"] = test_sat["id"]
    new_test["sat_id"] = sat_id
    new_test["epoch"] = test_sat["epoch"]
    new_test[["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]] = fixed_sim[train_true.shape[0]:]

    return new_train, new_test


def fix_sat_train(train, sat_id):
    train_sat = train[train.sat_id == sat_id]
    
    train_sim = train_sat[["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]].to_numpy()
    train_epoch = train_sat["timestamp"].to_numpy()
    
    begin = np.min(train_epoch)
    
    train_epoch -= begin
    
    train_true = train_sat[["x", "y", "z", "Vx", "Vy", "Vz"]].to_numpy()
    
    T_sim = evaluate_T_all(train_epoch, train_sim)
    T_true = evaluate_T_all(train_epoch, train_true)
    
    koef = T_true / T_sim
    
    train_splines = prolong_sim_all(train_epoch, train_sim, T_sim, T_true)
    
    fixed_sim = np.zeros_like(train_sim)
    for i in range(fixed_sim.shape[0]):
        for j in range(fixed_sim.shape[1]):
            fixed_sim[i, j] = train_splines[j](train_epoch[i] / koef)
            
    new_train = pd.DataFrame(columns=["sat_id", "epoch", "id", "x", "y", "z", "Vx", "Vy", "Vz", 
                                      "x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"])
    new_train["id"] = train_sat["id"]
    new_train["sat_id"] = sat_id     
    new_train[["x", "y", "z", "Vx", "Vy", "Vz"]] = train_sat[["x", "y", "z", "Vx", "Vy", "Vz"]]
    new_train["epoch"] = train_sat["epoch"]
    new_train[["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]] = fixed_sim[:train_true.shape[0]]

    return new_train

In [6]:
sats_to_predict = set(test_data["sat_id"].unique())
sats_others = set(train_data["sat_id"]) - sats_to_predict

In [7]:
new_train = []
new_test = []


for sat_id in tqdm(sats_to_predict):
    train_sat, test_sat = fix_sat(train_data, test_data, sat_id)
    new_train.append(train_sat)
    new_test.append(test_sat)
    
for sat_id in tqdm(sats_others):
    train_sat = fix_sat_train(train_data, sat_id)
    new_train.append(train_sat)
    
new_train = pd.concat(new_train)
new_test = pd.concat(new_test)

HBox(children=(FloatProgress(value=0.0, max=600.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [8]:
new_train.to_csv(PATH_SAVE_TRAIN, index=False)
new_test.to_csv(PATH_SAVE_TEST, index=False)