In [1]:
from coosys import cartesian_to_kepler as ctk, cartesian_to_quaternion as ctq
from coosys import kepler_to_cartesian as ktc, quaternion_to_cartesian as qtc
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression as LR
from sklearn.preprocessing import PolynomialFeatures as PF
from tqdm.auto import tqdm
tqdm.pandas()

from matplotlib import pyplot as plt
%matplotlib inline

  from pandas import Panel


In [2]:
base = 1.3885 * 10**9

train_data = pd.read_csv("IDAO 2020/train.csv", encoding="utf8")
train_data["epoch"] = pd.to_datetime(train_data["epoch"]).apply(pd.Timestamp.timestamp) - base

test_data = pd.read_csv("IDAO 2020/Track 1/test.csv", encoding="utf8")
test_data["epoch"] = pd.to_datetime(test_data["epoch"]).apply(pd.Timestamp.timestamp) - base

In [3]:
train_data

Unnamed: 0,id,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
0,0,34400.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.221690,-20741.615306,-0.907527,-3.804930,-2.024133
1,1,37203.000,0,-10567.672384,1619.746066,-24451.813271,-0.302590,-4.272617,-0.612796,-10555.500066,1649.289367,-24473.089556,-0.303704,-4.269816,-0.616468
2,2,40006.001,0,-10578.684043,-10180.467460,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,-24271.169776,0.274880,-4.046788,0.718768
3,3,42809.001,0,-9148.251857,-20651.437460,-20720.381279,0.715600,-3.373762,1.722115,-9149.620794,-20618.200201,-20765.019094,0.712437,-3.375202,1.718306
4,4,45612.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,-14992.399986,0.989382,-2.522618,2.342237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
649907,1234089,2705622.602,599,-21721.485878,-14048.557595,5277.807430,-1.351754,3.373418,0.004995,-20717.958996,-16245.240500,5250.939232,-1.653931,3.157321,0.079069
649908,1234090,2707113.240,599,-23176.890569,-8712.016936,5153.371350,-0.575955,3.764450,-0.175109,-22673.444496,-11192.339393,5243.608790,-0.945328,3.603371,-0.092202
649909,1234091,2708603.878,599,-23363.044794,-2906.071320,4747.247386,0.351381,3.992943,-0.372198,-23461.830699,-5570.167175,4966.813869,-0.087089,3.912550,-0.281989
649910,1234092,2710094.515,599,-22058.020262,3074.894039,4038.853542,1.421085,3.984793,-0.578849,-22858.679929,373.249102,4396.055679,0.920162,4.021955,-0.485364


In [4]:
gamma_km = 398603

def transform_row(row, func):
    return func(row.values)

def transform_dataset(data, func=lambda data: ctk(data, gamma_km), 
                      columns_from=("x", "y", "z", "Vx", "Vy", "Vz"), 
                      columns_to=("a", "e", "inclination", "longitude", "argument", "anomaly")):
    new_data = data.loc[:, list(columns_from)].progress_apply(transform_row, axis=1, result_type="expand", args=(func, ))
    new_data.columns = columns_to
    return data.drop(list(columns_from), axis=1).join(new_data)

In [5]:
train_quaternion = transform_dataset(train_data, func=lambda data: ctq(data, gamma_km),
                                     columns_from=("x", "y", "z", "Vx", "Vy", "Vz"),
                                     columns_to=(("a", "e", "anomaly", "q1", "q2", "q3", "q4")))
train_quaternion = transform_dataset(train_quaternion, func=lambda data: ctq(data, gamma_km), 
                                     columns_from=("x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"),
                                     columns_to=("a_sim", "e_sim", "anomaly_sim", "q1_sim", "q2_sim", "q3_sim", "q4_sim"))

HBox(children=(FloatProgress(value=0.0, max=649912.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=649912.0), HTML(value='')))




In [6]:
def phase_regression(phases):
    periods = 0
    new_phases = phases.copy()
    for i in range(1, phases.size):
        if phases[i] < phases[i - 1]:
            periods += 1
        new_phases[i] += 2 * np.pi * periods
    return new_phases

def phase_degression(phases):
    new_phases = np.zeros_like(phases)
    for i in range(phases.size):
        new_phases[i] = phases[i] - 2 * np.pi * np.trunc(phases[i] / 2 / np.pi) 
    return new_phases

In [7]:
sats_to_predict = set(test_data["sat_id"].unique())

In [8]:
test_quaternion = transform_dataset(test_data, func=lambda data: ctq(data, gamma_km), 
                                    columns_from=("x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"),
                                    columns_to=("a_sim", "e_sim", "anomaly_sim", "q1_sim", "q2_sim", "q3_sim", "q4_sim"))

HBox(children=(FloatProgress(value=0.0, max=284071.0), HTML(value='')))




In [9]:
def predict_one(train_quaternion, test_quaternion, sat_id, degree=1):
    train_sat = train_quaternion[train_quaternion.sat_id == sat_id]
    test_sat = test_quaternion[test_quaternion.sat_id == sat_id]
    result = pd.DataFrame(columns=["id", "a", "e", "anomaly", "q1", "q2", "q3", "q4"])
    result["id"] = test_sat["id"]
    train_t = train_sat["epoch"].to_numpy().reshape(-1, 1)
    train_features = PF(degree, include_bias=False).fit_transform(train_t)
    test_t = test_sat["epoch"].to_numpy().reshape(-1, 1)
    test_features = PF(degree, include_bias=False).fit_transform(test_t)
    
    for coordinate in ["a", "e", "q1", "q2", "q3", "q4"]:
        train_diff = train_sat[coordinate] - train_sat[coordinate + "_sim"]
        model = LR().fit(train_features, train_diff)
        result[coordinate] = test_sat[coordinate + "_sim"] + model.predict(test_features)
        
    train_diff = phase_regression(train_sat["anomaly"].to_numpy()) - phase_regression(train_sat["anomaly_sim"].to_numpy())
    model = LR().fit(train_features, train_diff)
    result["anomaly"] = phase_degression(test_sat["anomaly_sim"].to_numpy() + model.predict(test_features))
    
    if np.any(result["e"].to_numpy() >= 1.) or np.any(result["e"].to_numpy() < 0.):
        result["e"] = test_sat["e_sim"]

    return result

In [10]:
results = []

for sat_id in tqdm(sats_to_predict):
    results.append(predict_one(train_quaternion, test_quaternion, sat_id))
    
result_quaternion = pd.concat(results)

HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))




In [11]:
result_cartesian = transform_dataset(result_quaternion, func=lambda data: qtc(data, gamma_km), 
                                     columns_from=("a", "e", "anomaly", "q1", "q2", "q3", "q4"),
                                     columns_to=("x", "y", "z", "Vx", "Vy", "Vz"))

HBox(children=(FloatProgress(value=0.0, max=284071.0), HTML(value='')))




In [12]:
result_cartesian.to_csv("submission.csv", index=False)