Данное решение было оценено в 79.0. В нем использовалась линейная модель для предсказания ошибки приближенной модели в кватернионных координатах для всех координат, кроме аномалии (с учетом того, что эксцентриситет находится в промежутке от 0 до 1). Ошибка по аномалии раскладывалась на зависимость амлитуды колебаний от аномалии по приближенной модели и нормированные осцилляции. В результате, использовалась линейная модель для предсказания поведения амплитуды колебаний (с признаком-временем) и 
линейная модель для предсказания осцилляций (с признаками - синусом и косинусом приближенной аномалии).

In [1]:
from coosys import cartesian_to_kepler as ctk, cartesian_to_quaternion as ctq
from coosys import kepler_to_cartesian as ktc, quaternion_to_cartesian as qtc
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression as LR
from sklearn.preprocessing import PolynomialFeatures as PF
from scipy.interpolate import UnivariateSpline as US
from scipy import signal
import spectrum
from tqdm.auto import tqdm
tqdm.pandas()

from matplotlib import pyplot as plt
%matplotlib inline

  from pandas import Panel


In [2]:
base = 1.3885 * 10**9

train_data = pd.read_csv("IDAO 2020/train.csv", encoding="utf8")
train_data["epoch"] = pd.to_datetime(train_data["epoch"]).apply(pd.Timestamp.timestamp) - base

test_data = pd.read_csv("IDAO 2020/Track 1/test.csv", encoding="utf8")
test_data["epoch"] = pd.to_datetime(test_data["epoch"]).apply(pd.Timestamp.timestamp) - base

In [3]:
gamma_km = 398603

def transform_row(row, func):
    return func(row.values)

def transform_dataset(data, func=lambda data: ctk(data, gamma_km), 
                      columns_from=("x", "y", "z", "Vx", "Vy", "Vz"), 
                      columns_to=("a", "e", "inclination", "longitude", "argument", "anomaly")):
    new_data = data.loc[:, list(columns_from)].progress_apply(transform_row, axis=1, result_type="expand", args=(func, ))
    new_data.columns = columns_to
    return data.drop(list(columns_from), axis=1).join(new_data)

In [4]:
train_quaternion = transform_dataset(train_data, func=lambda data: ctq(data, gamma_km),
                                     columns_from=("x", "y", "z", "Vx", "Vy", "Vz"),
                                     columns_to=(("a", "e", "anomaly", "q1", "q2", "q3", "q4")))
train_quaternion = transform_dataset(train_quaternion, func=lambda data: ctq(data, gamma_km), 
                                     columns_from=("x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"),
                                     columns_to=("a_sim", "e_sim", "anomaly_sim", "q1_sim", "q2_sim", "q3_sim", "q4_sim"))

HBox(children=(FloatProgress(value=0.0, max=649912.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=649912.0), HTML(value='')))




In [5]:
def phase_regression(phases):
    periods = 0
    new_phases = phases.copy()
    for i in range(1, phases.size):
        if phases[i] < phases[i - 1]:
            periods += 1
        new_phases[i] += 2 * np.pi * periods
    return new_phases

def phase_degression(phases):
    new_phases = np.zeros_like(phases)
    for i in range(phases.size):
        new_phases[i] = phases[i] - 2 * np.pi * np.trunc(phases[i] / 2 / np.pi) 
    return new_phases

In [6]:
sats_to_predict = set(test_data["sat_id"].unique())

In [7]:
test_quaternion = transform_dataset(test_data, func=lambda data: ctq(data, gamma_km), 
                                    columns_from=("x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"),
                                    columns_to=("a_sim", "e_sim", "anomaly_sim", "q1_sim", "q2_sim", "q3_sim", "q4_sim"))

HBox(children=(FloatProgress(value=0.0, max=284071.0), HTML(value='')))




In [8]:
def predict_one(train_quaternion, test_quaternion, sat_id):
    train_sat = train_quaternion[train_quaternion.sat_id == sat_id]
    test_sat = test_quaternion[test_quaternion.sat_id == sat_id]
    result = pd.DataFrame(columns=["id", "a", "e", "anomaly", "q1", "q2", "q3", "q4"])
    result["id"] = test_sat["id"]
    train_t = train_sat["epoch"].to_numpy().reshape(-1, 1)
    train_features = PF(1, include_bias=False).fit_transform(train_t)
    test_t = test_sat["epoch"].to_numpy().reshape(-1, 1)
    test_features = PF(1, include_bias=False).fit_transform(test_t)
    
    for coordinate in ["a", "q1", "q2", "q3", "q4"]:
        train_diff = train_sat[coordinate] - train_sat[coordinate + "_sim"]
        model = LR().fit(train_features, train_diff)
        result[coordinate] = test_sat[coordinate + "_sim"] + model.predict(test_features)
        
    train_diff = train_sat["e"] - train_sat["e_sim"]
    model = LR().fit(train_features, train_diff)
    result["e"] = test_sat["e_sim"] + model.predict(test_features)
    if np.any(result["e"] < 0.) or np.any(result["e"] >= 1.):
        result["e"] = test_sat["e_sim"]
        
    fixed_anomaly_train = phase_regression(train_sat["anomaly"].to_numpy())
    fixed_anomaly_sim = phase_regression(np.concatenate([train_sat["anomaly_sim"].to_numpy(),
                                                         test_sat["anomaly_sim"].to_numpy()]))
    fixed_anomaly_train_sim = fixed_anomaly_sim[:train_sat["anomaly_sim"].to_numpy().size]
    fixed_anomaly_test_sim = fixed_anomaly_sim[train_sat["anomaly_sim"].to_numpy().size:]
    train_diff = fixed_anomaly_train - fixed_anomaly_train_sim
    train_maxs = signal.argrelmax(train_diff)[0]
    train_mins = signal.argrelmin(train_diff)[0]
    maxs_spline = US(fixed_anomaly_train_sim[train_maxs], train_diff[train_maxs], s=0, k=1)
    mins_spline = US(fixed_anomaly_train_sim[train_mins], train_diff[train_mins], s=0, k=1)
    maxs_spline_all = maxs_spline(fixed_anomaly_train_sim)
    mins_spline_all = mins_spline(fixed_anomaly_train_sim)
    train_amplitude = maxs_spline_all - mins_spline_all
    train_middle = (maxs_spline_all + mins_spline_all) / 2
    train_rectified_oscillate = np.clip((train_diff - train_middle) / train_amplitude, -0.5, 0.5)
    oscillate_features_train = np.concatenate([
        np.cos(fixed_anomaly_train_sim).reshape(-1, 1),
        np.sin(fixed_anomaly_train_sim).reshape(-1, 1),
    ], axis=1)
    maxs_model = LR().fit(train_features[train_maxs], train_diff[train_maxs])
    mins_model = LR().fit(train_features[train_mins], train_diff[train_mins])
    oscillate_model = LR(fit_intercept=False).fit(oscillate_features_train, train_rectified_oscillate)
    oscillate_features_test = np.concatenate([
        np.cos(fixed_anomaly_test_sim).reshape(-1, 1),
        np.sin(fixed_anomaly_test_sim).reshape(-1, 1),
    ], axis=1)
    test_maxs = maxs_model.predict(test_features)
    test_mins = mins_model.predict(test_features)
    test_middle = (test_maxs + test_mins) / 2
    test_amplitude = test_maxs - test_mins
    test_oscillate = oscillate_model.predict(oscillate_features_test)
    result["anomaly"] = phase_degression(fixed_anomaly_test_sim + test_middle + test_amplitude * test_oscillate)

    return result

In [9]:
results = []

for sat_id in tqdm(sats_to_predict):
    results.append(predict_one(train_quaternion, test_quaternion, sat_id))
    
result_quaternion = pd.concat(results)

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))




In [10]:
result_cartesian = transform_dataset(result_quaternion, func=lambda data: qtc(data, gamma_km), 
                                     columns_from=("a", "e", "anomaly", "q1", "q2", "q3", "q4"),
                                     columns_to=("x", "y", "z", "Vx", "Vy", "Vz"))

HBox(children=(FloatProgress(value=0.0, max=284071.0), HTML(value='')))




In [11]:
result_cartesian = result_cartesian.sort_values("id")

In [12]:
result_cartesian.to_csv("submission.csv", index=False)