# Dataset: IDAO (full)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Data

In [2]:
df_train = pd.read_csv("data/train.csv", index_col="id")
df_test = pd.read_csv("data/test.csv", index_col="id")
df_train_fixed = pd.read_csv("data/train_fixed_period.csv", index_col="id")
df_test_fixed = pd.read_csv("data/test_fixed_period.csv", index_col="id")
df_test_ans = pd.read_csv("data/ans.csv", index_col="id")
df_test_ans['sat_id'] = df_test['sat_id']

n_sat = len(pd.unique(df_train["sat_id"]))

# Updated SGP4 prediction

Updated SGP4-model predictions where the last known true position point is used as the reference point.

In [3]:
from utils import sgp4_ephemeris

gzip was not found on your system! You should solve this issue for astroquery.eso to be at its best!
On POSIX system: make sure gzip is installed and in your path!On Windows: same for 7-zip (http://www.7-zip.org)!


In [4]:
df_test_upd = pd.DataFrame().reindex_like(df_test)
df_test_upd[["sat_id", "epoch"]] = df_test[["sat_id", "epoch"]]

for sat_id in pd.unique(df_train["sat_id"]):
    ref_coords = df_train[df_train['sat_id'] == sat_id].iloc[-1]
    prediction_dates_list = df_test_upd[df_test_upd['sat_id'] == sat_id]['epoch']
    sgp4_upd = sgp4_ephemeris(ref_coords, prediction_dates_list)
    sgp4_upd.set_index(df_test_upd[df_test_upd['sat_id'] == sat_id].index, inplace=True)
    df_test_upd.loc[df_test_upd['sat_id'] == sat_id, ["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]] = sgp4_upd.drop(columns=["epoch"]).values

df_test_upd.head()

Unnamed: 0_level_0,sat_id,epoch,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
215,0,2014-02-01T00:05:07.344,-33030.685465,-26390.098519,22652.543225,-1.799607,2.229662,-2.069609
216,0,2014-02-01T03:32:46.448,-42237.996877,6578.952708,-7358.428702,0.524301,2.786823,-2.492041
217,0,2014-02-01T07:00:25.552,-23404.847446,36018.67019,-33224.72416,2.176022,1.828754,-1.572678
218,0,2014-02-01T10:28:04.656,5931.140257,52581.720176,-47129.784835,2.406201,0.897595,-0.726228
219,0,2014-02-01T13:55:43.760,34843.570271,59866.537701,-52691.797835,2.210414,0.318932,-0.211893


In [5]:
# df_test_upd.to_csv('data/test_updated_sgp4.csv')

# LR

In [6]:
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import PolynomialFeatures
from tqdm import tqdm

In [7]:
# features columns 
feature_cols = ["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]

# target variables columns 
target_cols = ["x", "y", "z", "Vx", "Vy", "Vz"]

### test dataset – SGP4 predictions

In [8]:
# Linear (Polynomial) Regression models
poly = PolynomialFeatures(2)

pred_lr = pd.DataFrame(columns=target_cols, index=df_test.index)
pred_lr["sat_id"] = df_test["sat_id"]

for sat_id in tqdm(range(n_sat)):
    
    train_idxs = df_train["sat_id"] == sat_id
    test_idxs = df_test["sat_id"] == sat_id
    X_train = df_train.loc[train_idxs, feature_cols]
    y_train = df_train.loc[train_idxs, target_cols]
    X_test = df_test.loc[test_idxs, feature_cols]
    
    model = LinearRegression(normalize=True).fit(X_train.values, y_train.values) 
    pred_lr.loc[test_idxs, target_cols] = model.predict(X_test.values)

100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [01:13<00:00,  8.21it/s]


### test dataset – updated SGP4 predictions

In [9]:
# Linear (Polynomial) Regression models
poly = PolynomialFeatures(2)

pred_lr_upd = pd.DataFrame(columns=target_cols, index=df_test.index)
pred_lr_upd["sat_id"] = df_test["sat_id"]

for sat_id in tqdm(range(n_sat)):
    
    train_idxs = df_train["sat_id"] == sat_id
    test_idxs = df_test["sat_id"] == sat_id
    X_train = df_train.loc[train_idxs, feature_cols]
    y_train = df_train.loc[train_idxs, target_cols]
    X_test = df_test_upd.loc[test_idxs, feature_cols]
    
    model = LinearRegression(normalize=True).fit(X_train.values, y_train.values) 
    pred_lr_upd.loc[test_idxs, target_cols] = model.predict(X_test.values)

100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [01:11<00:00,  8.44it/s]


### Fixed periods LR

In [10]:
# Linear (Polynomial) Regression models
poly = PolynomialFeatures(2)

pred_lr_fixed = pd.DataFrame(columns=target_cols, index=df_test.index)
pred_lr_fixed["sat_id"] = df_test_fixed["sat_id"]

for sat_id in tqdm(range(n_sat)):
    
    train_idxs = df_train_fixed["sat_id"] == sat_id
    test_idxs = df_test_fixed["sat_id"] == sat_id
    X_train = df_train_fixed.loc[train_idxs, feature_cols]
    y_train = df_train_fixed.loc[train_idxs, target_cols]
    X_test = df_test_fixed.loc[test_idxs, feature_cols]
    
    model = LinearRegression(normalize=True).fit(X_train.values, y_train.values) 
    pred_lr_fixed.loc[test_idxs, target_cols] = model.predict(X_test.values)

100%|████████████████████████████████████████████████████████████████████████████████| 600/600 [01:12<00:00,  8.32it/s]


# IDAO

IDAO submissions (SGP4 and SGP4-updated)

In [11]:
# submissions directory
sub_dir = 'submissions/'
participants = [
    'data_o_plomo',
    'david_sergey',
    'alsetboost',
    'veni_vidi_vici',
]

In [12]:
pred_idao = {}
pred_idao_upd = {}
pred_idao_fixed = {}
for p in participants:
    print(p)
    pred_idao[p] = pd.read_csv(sub_dir + 'sub_' + p + '.csv', index_col='id')
    pred_idao[p]['sat_id'] = pred_lr['sat_id']
    pred_idao_upd[p] = pd.read_csv(sub_dir + 'sub_' + p + '_updated_sgp4.csv', index_col='id')
    pred_idao_upd[p]['sat_id'] = pred_lr['sat_id']
    pred_idao_fixed[p] = pd.read_csv(sub_dir + 'sub_' + p + '_fixed_period.csv', index_col='id')
    pred_idao_fixed[p]['sat_id'] = pred_lr['sat_id']

data_o_plomo
david_sergey
alsetboost
veni_vidi_vici


# SGP4-predictions

In [13]:
df_pred_sgp4 = df_test.rename(columns=dict(zip(feature_cols, target_cols)))
df_pred_sgp4_upd = df_test_upd.rename(columns=dict(zip(feature_cols, target_cols)))
df_pred_fixed = df_test_fixed.rename(columns=dict(zip(feature_cols, target_cols)))

# Metrics

In [14]:
from metrics import smape_idao, idao_score, smape_new_vector_norm, mahalanobis_distance

In [15]:
def print_metrics(model_name, pred):
    print(f"\n{model_name}\n")
    print(f"IDAO score:  {idao_score(pred[target_cols], df_test_ans[target_cols])}")    
    print(f"SMAPE IDAO:  {smape_idao(pred[target_cols], df_test_ans[target_cols])}")    
    print(f"SMAPE new:   {smape_new_vector_norm(pred, df_test_ans)}")    
#     print(f"Mahalanobis: {mahalanobis_distance(pred, df_test_ans)}")

### SGP4

In [16]:
print("TOTAL SCORE")
print_metrics("SGP4", df_pred_sgp4)
print_metrics("LR", pred_lr)
for part, pred in pred_idao.items():
    print_metrics(part, pred)

TOTAL SCORE

SGP4

IDAO score:  85.35333249014583
SMAPE IDAO:  0.14646667509854172
SMAPE new:   0.22465428137020632

LR

IDAO score:  91.81678353872569
SMAPE IDAO:  0.08183216461274313
SMAPE new:   0.12727719610050833

data_o_plomo

IDAO score:  97.21670571788155
SMAPE IDAO:  0.02783294282118438
SMAPE new:   0.056036577434143274

david_sergey

IDAO score:  97.84329460084184
SMAPE IDAO:  0.021567053991581563
SMAPE new:   0.03087868117511732

alsetboost

IDAO score:  96.92031297743414
SMAPE IDAO:  0.030796870225658628
SMAPE new:   0.052050588061025874

veni_vidi_vici

IDAO score:  97.00698941270917
SMAPE IDAO:  0.029930105872908275
SMAPE new:   0.05707327384195287


### Updated SGP4

In [17]:
print("TOTAL SCORE")
print_metrics("Updated SGP4", df_pred_sgp4_upd)
print_metrics("LR (Updated SGP4)", pred_lr_upd)
for part, pred in pred_idao_upd.items():
    print_metrics(part + " (Updated SGP4)", pred)

TOTAL SCORE

Updated SGP4

IDAO score:  94.57101473641376
SMAPE IDAO:  0.054289852635862376
SMAPE new:   0.06542546046528024

LR (Updated SGP4)

IDAO score:  80.3290079110635
SMAPE IDAO:  0.19670992088936498
SMAPE new:   0.3592626653961014

data_o_plomo (Updated SGP4)

IDAO score:  97.21670571788155
SMAPE IDAO:  0.02783294282118438
SMAPE new:   0.056036577434143274

david_sergey (Updated SGP4)

IDAO score:  97.84329458679521
SMAPE IDAO:  0.02156705413204788
SMAPE new:   0.030878681275868913

alsetboost (Updated SGP4)

IDAO score:  96.92031297743414
SMAPE IDAO:  0.03079687022565863
SMAPE new:   0.052050588061025874

veni_vidi_vici (Updated SGP4)

IDAO score:  89.68988245881931
SMAPE IDAO:  0.10310117541180686
SMAPE new:   0.140020130441392


### Fixed periods

In [18]:
print("TOTAL SCORE")
print_metrics("Fixed period", df_pred_fixed)
print_metrics("LR (Fixed period)", pred_lr_fixed)
for part, pred in pred_idao_fixed.items():
    print_metrics(part + " (fixed period)", pred)

TOTAL SCORE

Fixed period

IDAO score:  89.72641571606664
SMAPE IDAO:  0.10273584283933358
SMAPE new:   0.1361869931907276

LR (Fixed period)

IDAO score:  93.55510110973732
SMAPE IDAO:  0.06444898890262675
SMAPE new:   0.08471389182819992

data_o_plomo (fixed period)

IDAO score:  97.21670571855266
SMAPE IDAO:  0.027832942814473297
SMAPE new:   0.05603657745308817

david_sergey (fixed period)

IDAO score:  97.8432932378042
SMAPE IDAO:  0.02156706762195796
SMAPE new:   0.030878711273738226

alsetboost (fixed period)

IDAO score:  96.92031297743414
SMAPE IDAO:  0.03079687022565862
SMAPE new:   0.052050588061025874

veni_vidi_vici (fixed period)

IDAO score:  97.13407016473609
SMAPE IDAO:  0.02865929835263916
SMAPE new:   0.05211427293161519
