In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Data

In [2]:
df_train = pd.read_csv("data/train.csv", index_col="id")
df_test = pd.read_csv("data/test.csv", index_col="id")
df_test_ans = pd.read_csv("data/ans.csv", index_col="id")
df_test_ans['sat_id'] = df_test['sat_id']
df_pred_sgp4 = pd.read_csv("data/pred_sgp.csv", index_col="id")

n_sat = len(pd.unique(df_train["sat_id"]))

# LR

In [3]:
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import PolynomialFeatures
from tqdm import tqdm

In [4]:
# features columns 
feature_cols = ["x_sim", "y_sim", "z_sim", "Vx_sim", "Vy_sim", "Vz_sim"]

# target variables columns 
target_cols = ["x", "y", "z", "Vx", "Vy", "Vz"]

In [5]:
# Linear (Polynomial) Regression models
poly = PolynomialFeatures(2)

pred_lr = pd.DataFrame(columns=target_cols, index=df_test.index)
pred_lr["sat_id"] = df_test["sat_id"]

for sat_id in tqdm(range(n_sat)):
    
    train_idxs = df_train["sat_id"] == sat_id
    test_idxs = df_test["sat_id"] == sat_id
    X_train = df_train.loc[train_idxs, feature_cols]
    y_train = df_train.loc[train_idxs, target_cols]
    X_test = df_test.loc[test_idxs, feature_cols]
    
    model = LinearRegression(normalize=True).fit(X_train.values, y_train.values) 
    pred_lr.loc[test_idxs, target_cols] = model.predict(X_test.values)

100%|██████████| 600/600 [01:41<00:00,  4.54it/s]


# IDAO

In [6]:
pred_idao = pd.read_csv('data/sub_idao_example.csv', index_col='id')

In [7]:
pred_idao['sat_id'] = pred_lr['sat_id']

# Metrics

In [8]:
from metrics import smape_idao, idao_score, smape_new_vector_norm, mahalanobis_distance

In [9]:
def print_metrics(model_name, pred):
    print(f"\n{model_name}\n")
    print(f"IDAO score:  {idao_score(pred[target_cols], df_test_ans[target_cols])}")    
    print(f"SMAPE IDAO:  {smape_idao(pred[target_cols], df_test_ans[target_cols])}")    
    print(f"SMAPE new:   {smape_new_vector_norm(pred, df_test_ans)}")    
#     print(f"Mahalanobis: {mahalanobis_distance(pred, df_test_ans)}")

In [11]:
pred_sgp4 = df_pred_sgp4.rename(
    columns=dict(zip(feature_cols, target_cols)))

print_metrics("SGP4 – Total Score:", pred_sgp4)
print_metrics("LR – Total Score:", pred_lr)
print_metrics("IDAO – Total Score:", pred_idao)


SGP4 – Total Score:

IDAO score:  85.76745399276147
SMAPE IDAO:  0.14232546007238525
SMAPE new:   0.2105207081354428

LR – Total Score:

IDAO score:  91.81678353872567
SMAPE IDAO:  0.08183216461274322
SMAPE new:   0.1272771961005083

IDAO – Total Score:

IDAO score:  96.92031297743414
SMAPE IDAO:  0.030796870225658628
SMAPE new:   0.052050588061025874
