In [1]:
import numpy as np
import pandas as pd
import lightgbm as lgbm
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from datetime import datetime
import time
from sklearn.linear_model import LinearRegression
from tqdm import tqdm_notebook as tqdm

In [2]:
def smape(satellite_predicted_values, satellite_true_values): 
    # the division, addition and subtraction are pointwise 
    return np.mean(np.abs((satellite_predicted_values - satellite_true_values) 
        / (np.abs(satellite_predicted_values) + np.abs(satellite_true_values))))

In [3]:
# Загрузка данных
# windows
PATH_TO_DATA = os.path.join('../data')
full_train = pd.read_csv(os.path.join(PATH_TO_DATA, 'train.csv'), index_col='id')
full_test = pd.read_csv(os.path.join(PATH_TO_DATA, 'test.csv'), index_col='id')

In [4]:
full_train

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,2014-01-01T00:00:00.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.221690,-20741.615306,-0.907527,-3.804930,-2.024133
1,2014-01-01T00:46:43.000,0,-10567.672384,1619.746066,-24451.813271,-0.302590,-4.272617,-0.612796,-10555.500066,1649.289367,-24473.089556,-0.303704,-4.269816,-0.616468
2,2014-01-01T01:33:26.001,0,-10578.684043,-10180.467460,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,-24271.169776,0.274880,-4.046788,0.718768
3,2014-01-01T02:20:09.001,0,-9148.251857,-20651.437460,-20720.381279,0.715600,-3.373762,1.722115,-9149.620794,-20618.200201,-20765.019094,0.712437,-3.375202,1.718306
4,2014-01-01T03:06:52.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,-14992.399986,0.989382,-2.522618,2.342237
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1234089,2014-01-31T22:00:22.602,599,-21721.485878,-14048.557595,5277.807430,-1.351754,3.373418,0.004995,-20717.958996,-16245.240500,5250.939232,-1.653931,3.157321,0.079069
1234090,2014-01-31T22:25:13.240,599,-23176.890569,-8712.016936,5153.371350,-0.575955,3.764450,-0.175109,-22673.444496,-11192.339393,5243.608790,-0.945328,3.603371,-0.092202
1234091,2014-01-31T22:50:03.878,599,-23363.044794,-2906.071320,4747.247386,0.351381,3.992943,-0.372198,-23461.830699,-5570.167175,4966.813869,-0.087089,3.912550,-0.281989
1234092,2014-01-31T23:14:54.515,599,-22058.020262,3074.894039,4038.853542,1.421085,3.984793,-0.578849,-22858.679929,373.249102,4396.055679,0.920162,4.021955,-0.485364


In [5]:
full_test

Unnamed: 0_level_0,sat_id,epoch,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
3927,1,2014-02-01T00:01:45.162,-13366.891347,-14236.753503,6386.774555,4.333815,-0.692764,0.810774
3928,1,2014-02-01T00:22:57.007,-7370.434039,-14498.771520,7130.411325,5.077413,0.360609,0.313402
3929,1,2014-02-01T00:44:08.852,-572.068654,-13065.289498,7033.794876,5.519106,2.012830,-0.539412
3930,1,2014-02-01T01:05:20.697,6208.945257,-9076.852425,5548.296900,4.849212,4.338955,-1.869600
3931,1,2014-02-01T01:26:32.542,10768.200284,-2199.706707,2272.014862,1.940505,6.192887,-3.167724
...,...,...,...,...,...,...,...,...
1231060,597,2014-02-28T05:19:01.386,28595.031634,-85458.623976,5021.767767,-1.721131,-0.011611,0.122357
1231061,597,2014-02-28T07:21:46.454,15547.173728,-84233.509948,5840.616702,-1.815323,0.353445,0.098927
1231062,597,2014-02-28T09:24:31.522,1981.107111,-80123.860051,6458.394351,-1.858963,0.773846,0.067379
1231063,597,2014-02-28T11:27:16.590,-11644.801379,-72674.504171,6805.361999,-1.826030,1.262930,0.024782


In [6]:
full_train.groupby('sat_id').describe().T

Unnamed: 0,sat_id,0,1,2,3,4,5,6,7,8,9,...,590,591,592,593,594,595,596,597,598,599
x,count,958.000000,2108.000000,417.000000,354.000000,1210.000000,3531.000000,418.000000,235.000000,2578.000000,330.000000,...,584.000000,339.000000,1281.000000,1307.000000,1210.000000,499.000000,378.000000,366.000000,648.000000,1797.000000
x,mean,4900.339540,-16110.661352,-35645.536202,55850.315483,-6362.637373,-1332.440347,-3992.980574,-69315.624667,1821.125809,3974.751573,...,33969.821907,-46291.314162,-21555.015684,-8668.628135,-1425.664933,10580.871178,-46606.855632,31587.704549,-9840.325692,-1249.927953
x,std,9843.287489,13802.803673,40448.660925,42226.892159,20304.954487,10516.767043,40254.674362,54817.363085,12760.325326,43704.812113,...,32899.164658,45188.396733,19364.957099,20257.571491,20617.707887,12574.760713,41480.443911,44511.927815,30444.932286,15884.479675
x,min,-10752.229751,-33092.618399,-83475.871225,-28036.656341,-33119.544128,-15784.759280,-65975.489409,-130892.958010,-16973.914939,-59286.173946,...,-26260.593295,-98305.756377,-46535.764661,-34693.709309,-30089.826690,-11853.697706,-93502.440554,-42789.959911,-50041.240812,-23364.288658
x,25%,-4846.044039,-28724.877108,-72080.483505,27794.354266,-26239.094949,-11832.997586,-44257.957384,-119053.574244,-10903.902531,-38957.735073,...,3605.024327,-85473.827246,-39108.560422,-27242.692225,-23186.033247,-138.602966,-85035.962802,-10270.594276,-39882.598904,-16966.877633
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vz_sim,min,-3.582274,-3.267208,-2.548670,-0.070043,-3.853693,-0.573519,-1.695279,-0.856612,-4.264669,-0.960054,...,-1.970567,-1.991123,-0.739670,-1.813285,-0.592408,-1.481722,-0.600138,-0.446232,-1.391375,-1.037044
Vz_sim,25%,-2.024101,-0.877540,-0.768268,-0.058169,-2.220249,-0.388132,-1.125444,-0.702429,-3.156244,-0.726537,...,-0.586607,-0.526903,-0.343957,-1.438917,-0.479257,-1.353386,-0.535063,-0.118929,-0.765975,-0.483254
Vz_sim,50%,0.714230,0.572531,0.423172,-0.028103,0.270589,0.011565,-0.117304,-0.328873,-0.245740,-0.170071,...,0.286695,0.286162,0.071640,-0.263998,-0.093983,-0.831555,-0.299655,0.077402,0.128822,0.152503
Vz_sim,75%,2.176876,1.214693,0.889922,0.039338,1.967847,0.387142,0.980571,0.378398,2.833273,0.738587,...,0.669490,0.629948,0.373128,1.429139,0.467210,0.375542,0.309803,0.152677,0.817466,0.563858


In [7]:
# захотел посмотреть разницу в значениях для SGP-4 и реальными данными
# типо много на смом деле пздц как сильно разнятся данные
for col_name in full_train.columns[2:8]:
    full_train['delta_' + col_name] = full_train[col_name + '_sim'] - full_train[col_name]

In [8]:
delta_names = full_train.columns[14:].to_list()
full_train[delta_names + ['sat_id']].groupby('sat_id').describe().T

Unnamed: 0,sat_id,0,1,2,3,4,5,6,7,8,9,...,590,591,592,593,594,595,596,597,598,599
delta_x,count,958.0,2108.0,417.0,354.0,1210.0,3531.0,418.0,235.0,2578.0,330.0,...,584.0,339.0,1281.0,1307.0,1210.0,499.0,378.0,366.0,648.0,1797.0
delta_x,mean,1.208894,2112.446451,209.738936,326.081733,8.249695,-12.821916,-13515.094036,449.078722,-266.266534,1837.652562,...,41.971583,-609.134603,4934.673529,-45.598508,424.624676,-498.634398,-346.468126,470.050911,406.748192,-1.919105
delta_x,std,3053.526207,3023.693121,8822.708298,9808.638586,10555.451555,1904.838397,18191.19296,21917.512787,663.871604,4024.603051,...,9866.726522,261.50306,13606.203374,580.305934,1245.23379,4053.668639,6271.068678,17616.065748,6091.914586,1106.953228
delta_x,min,-9158.649797,-6436.1832,-14920.301581,-14903.0451,-18287.374618,-4595.159437,-77497.609193,-39129.84272,-2349.980797,-2219.447596,...,-26350.088327,-1384.493213,-24977.828593,-1616.287974,-1374.456773,-13660.407129,-13088.737558,-27413.897192,-11074.432313,-3373.490862
delta_x,25%,-1828.329475,356.937143,-5421.606604,-6001.73525,-6183.584044,-1218.435437,-17509.553259,-12892.183612,-614.397696,-195.41134,...,-4129.12086,-676.543913,-3849.170155,-387.586543,-383.677554,-950.766496,-1278.515302,-11260.842362,-507.738066,-576.760631
delta_x,50%,-8.945639,1152.946981,128.041125,383.232946,-120.354162,-10.341319,-5265.083207,-172.927364,-89.095419,232.492879,...,119.66992,-514.252345,941.969324,-15.683449,3.197385,30.274255,-219.084875,127.330249,151.992679,80.917657
delta_x,75%,2282.419198,2726.027123,2288.651543,1527.084463,5709.884505,1211.534277,-1460.366024,6780.314991,154.338836,2163.460188,...,4807.368918,-430.638162,11340.1957,330.153067,923.723412,2526.074522,228.651769,4369.971272,1207.905108,743.114525
delta_x,max,6917.312123,17367.556899,19390.882441,28777.855432,31478.635403,4694.625385,33.298655,85752.824417,1083.993789,20008.869272,...,27120.696869,-310.074944,49615.638101,1223.798076,4896.482077,7368.15011,23498.280008,48952.8161,13452.134391,2168.584877
delta_y,count,958.0,2108.0,417.0,354.0,1210.0,3531.0,418.0,235.0,2578.0,330.0,...,584.0,339.0,1281.0,1307.0,1210.0,499.0,378.0,366.0,648.0,1797.0
delta_y,mean,116.593133,967.435361,270.342042,462.127083,-18.785206,4.901992,-2822.584859,999.266593,20.887208,677.148273,...,214.516953,-58.049923,2106.023413,-12.730545,176.778974,-815.344643,178.065723,668.191816,183.237904,4.772428


In [9]:
# отрисовка нужного рядя для анализа

figure = plt.figure(figsize=(10, 5))


sat_id = 28
real_col_name = 'x'
sim_col_name = real_col_name + '_sim'

fig = go.Figure()
fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][real_col_name],
                         name="Real",
                         line_color='deepskyblue'))

fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][sim_col_name],
                         name="Simulation",
                         line_color='dimgray'))

fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                         y=full_test[full_test.sat_id == sat_id][sim_col_name],
                         name="Sim_test",
                         line_color='MediumPurple'))

fig.update_layout(title_text='Time Series for ' + real_col_name,
                  xaxis_rangeslider_visible=True,
                  yaxis_title=real_col_name)
fig.show()

<Figure size 720x360 with 0 Axes>

In [10]:
# короче sim данные дофига тупые до 225 минут там вроде в задаче написано, нам надо бы предскзаать это дерьмо
# надо проверить гипотезу о периодах

figure = plt.figure(figsize=(10, 5))


sat_id = 113
real_col_name = 'x'
sim_col_name = real_col_name + '_sim'

df_train = full_train[full_train.sat_id == sat_id]
df_test = full_test[full_test.sat_id == sat_id]

d_real = df_train[real_col_name].values
d_sim = df_train[sim_col_name].values
d_test = df_test[sim_col_name].values

fig = go.Figure()
fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][real_col_name],
                         name="Real",
                         line_color='deepskyblue'))

fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][sim_col_name],
                         name="Simulation",
                         line_color='dimgray'))

fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                         y=full_test[full_test.sat_id == sat_id][sim_col_name],
                         name="Sim_test",
                         line_color='MediumPurple'))

fig.update_layout(title_text='Time Series for ' + real_col_name,
                  xaxis_rangeslider_visible=True,
                  yaxis_title=real_col_name)
fig.show()

<Figure size 720x360 with 0 Axes>

In [10]:
# интерполяция по производной !!! просто интересно было посмотреть


sat_id = 33
fig = make_subplots(rows=3, cols=2,
                    subplot_titles=['x', 'Vx', 'y', 'Vy', 'z', 'Vz'])

for i, real_col_name in enumerate(['x', 'y', 'z', 'Vx', 'Vy', 'Vz']):
    row, col = i % 3 + 1, i // 3 + 1
    sim_col_name = real_col_name + '_sim'
    fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                             y=full_train[full_train.sat_id == sat_id][real_col_name],
                             name="Real " + real_col_name,
                             ), row=row, col=col)

    fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                             y=full_train[full_train.sat_id == sat_id][sim_col_name],
                             name="Simulation " + real_col_name,
                             ), row=row, col=col)

    fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                             y=full_test[full_test.sat_id == sat_id][sim_col_name],
                             name="Sim_test " + real_col_name,
                             ), row=row, col=col)
fig.show()

In [11]:


sat_id = 28

real_col_name = 'x'
sim_col_name = real_col_name + '_sim'

df_train = full_train[full_train.sat_id == sat_id]
df_test = full_test[full_test.sat_id == sat_id]

d_t = df_train.epoch.apply(lambda x: time.mktime(datetime.strptime(x, '%Y-%m-%dT%H:%M:%S.%f').timetuple())).values
d_t = d_t[1:] - d_t[:-1]
d_t[d_t == 0] = 10000

d_tt = df_test.epoch.apply(lambda x: time.mktime(datetime.strptime(x, '%Y-%m-%dT%H:%M:%S.%f').timetuple())).values
d_tt = d_tt[1:] - d_tt[:-1]
d_tt[d_tt == 0] = 10000

d_real = full_train[full_train.sat_id == sat_id][real_col_name].values[1:] - full_train[full_train.sat_id == sat_id][real_col_name].values[:-1]
d_sim = full_train[full_train.sat_id == sat_id][sim_col_name].values[1:] - full_train[full_train.sat_id == sat_id][sim_col_name].values[:-1]
d_test = full_test[full_test.sat_id == sat_id][sim_col_name].values[1:] - full_test[full_test.sat_id == sat_id][sim_col_name].values[:-1]

fig = go.Figure()
fig.add_trace(go.Scatter(x=df_train.epoch.values[1:],
                         y=d_real / d_t,
                         name="Real",
                         line_color='deepskyblue'))

fig.add_trace(go.Scatter(x=df_train.epoch.values[1:],
                         y=d_sim / d_t,
                         name="Simulation",
                         line_color='dimgray'))

fig.add_trace(go.Scatter(x=df_test.epoch.values[1:],
                         y=d_test / d_tt,
                         name="Sim_test",
                         line_color='MediumPurple'))

fig.update_layout(title_text='Time Series for ' + real_col_name,
                  xaxis_rangeslider_visible=True,
                  yaxis_title=real_col_name)
fig.show()

In [12]:
# посчитаем основные статистики и закинем их как признаки хотя конечно
# странно вообще предсказывать временные ряды с помощью деревьев
# получается что 24 точки это период!
# давайте сделаем следующее - найдём минимум или максимум,
# чтобы найти нижнюю или верхнюю точку периода
# а далее будем считать по ним статистики

# ничего плохого в том, чтобы использовать простые модели для каждого спутника


In [13]:
# от каждого спутника мы берём первые 24 значения ищем минимум по каждой колонке

In [14]:
# x_num

for i, col in enumerate(['x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']):
    full_train[col + '_num'] = 0
    arr = full_train[col + '_num'].values
    start = 0
    for sat_id in range(600):
        df = full_train[full_train.sat_id == sat_id]
        step = df.shape[0]
        idx_min = df[col].idxmin() % 24
        arr[start: start + step] = np.fromfunction(lambda i: (i + idx_min) % 24, (step, ))
        start += step
    full_train[col + '_num'] = arr
        

In [16]:
# давайте строить тренд по последним 5 предсказаниям (то есть мы будем делать следующее берём последние 5 
# значений интерполируем с помощью прямой, а далее смотрим чё получим)

In [17]:
# x_num

for i, col in enumerate(['x_sim', 'y_sim', 'z_sim', 'Vx_sim', 'Vy_sim', 'Vz_sim']):
    full_test[col + '_num'] = 0
    arr = full_test[col + '_num'].values
    start = 0
    for sat_id in full_test.sat_id.unique():
        df = full_test[full_test.sat_id == sat_id]
        step = df.shape[0]
        idx_last = full_train[full_train.sat_id == sat_id][col + '_num'].iloc[-1] + 1
        arr[start: start + step] = np.fromfunction(lambda i: (i + idx_last) % 24, (step, ))
        start += step
    full_test[col + '_num'] = arr
        

In [18]:
full_train['x' + '_num'] = 0

In [19]:
full_train[full_train.sat_id == 0]

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,...,delta_Vx,delta_Vy,delta_Vz,x_sim_num,y_sim_num,z_sim_num,Vx_sim_num,Vy_sim_num,Vz_sim_num,x_num
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2014-01-01T00:00:00.000,0,-8855.823863,13117.780146,-20728.353233,-0.908303,-3.808436,-2.022083,-8843.131454,13138.221690,...,0.000776,0.003506,-0.002050,2,7,1,22,1,22,0
1,2014-01-01T00:46:43.000,0,-10567.672384,1619.746066,-24451.813271,-0.302590,-4.272617,-0.612796,-10555.500066,1649.289367,...,-0.001114,0.002801,-0.003673,3,8,2,23,2,23,0
2,2014-01-01T01:33:26.001,0,-10578.684043,-10180.467460,-24238.280949,0.277435,-4.047522,0.723155,-10571.858472,-10145.939908,...,-0.002555,0.000734,-0.004387,4,9,3,0,3,0,0
3,2014-01-01T02:20:09.001,0,-9148.251857,-20651.437460,-20720.381279,0.715600,-3.373762,1.722115,-9149.620794,-20618.200201,...,-0.003163,-0.001440,-0.003809,5,10,4,1,4,1,0
4,2014-01-01T03:06:52.002,0,-6719.092336,-28929.061629,-14938.907967,0.992507,-2.519732,2.344703,-6729.358857,-28902.271436,...,-0.003125,-0.002887,-0.002466,6,11,5,2,5,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953,2014-01-31T20:27:33.474,0,17337.596150,-3224.996803,40025.071742,-0.055615,2.650511,-0.222561,15425.677762,12387.078210,...,-0.527231,-0.156437,-1.211385,19,0,18,15,18,15,0
954,2014-01-31T21:14:16.474,0,16849.590836,4217.959953,38636.167298,-0.295282,2.642711,-0.774030,13419.744899,19007.658296,...,-0.554339,-0.441210,-1.264989,20,1,19,16,19,16,0
955,2014-01-31T22:00:59.475,0,15667.981809,11481.446566,35656.909015,-0.550136,2.518368,-1.356292,10662.661170,24548.177490,...,-0.566125,-0.802939,-1.280543,21,2,20,17,20,17,0
956,2014-01-31T22:47:42.475,0,13754.838284,18199.705814,31013.052037,-0.816256,2.247835,-1.959266,7182.757625,28395.590633,...,-0.543839,-1.263139,-1.213983,22,3,21,18,21,18,0


In [20]:
for col_name in ['x', 'y', 'z', 'Vx', 'Vy', 'Vz']:
    full_test[col_name] = 0.0

In [21]:
full_test.dtypes

sat_id          int64
epoch          object
x_sim         float64
y_sim         float64
z_sim         float64
Vx_sim        float64
Vy_sim        float64
Vz_sim        float64
x_sim_num       int64
y_sim_num       int64
z_sim_num       int64
Vx_sim_num      int64
Vy_sim_num      int64
Vz_sim_num      int64
x             float64
y             float64
z             float64
Vx            float64
Vy            float64
Vz            float64
dtype: object

In [22]:
%%time
width = 4
for sat_id in tqdm(full_test.sat_id.unique()):
    df1_train = full_train[full_train.sat_id == sat_id]
    df1_test =  full_test[full_test.sat_id == sat_id]
    for col in ['x', 'y', 'z', 'Vx', 'Vy', 'Vz']:
        for i in range(24):
            df2_train = df1_train[df1_train[col + '_sim_num'] == i]
            df2_test = df1_test[df1_test[col + '_sim_num'] == i]
            lin_model = LinearRegression()
            X_train = df2_train.reset_index()['id'].values[-width:].reshape(-1, 1)
            y_train = df2_train[col].values[-width:]
            lin_model.fit(X_train, y_train)
            X_test = df2_test.reset_index()['id'].values.reshape(-1, 1)
            y_test = lin_model.predict(X_test)
            full_test.loc[X_test.ravel(), col] = y_test


This function will be removed in tqdm==5.0.0
Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`



HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))


CPU times: user 2min, sys: 25.9 ms, total: 2min
Wall time: 2min


In [24]:
# интерполяция по производной !!! просто интересно было псомотерть


sat_id = 2
fig = make_subplots(rows=3, cols=2,
                    subplot_titles=['x', 'Vx', 'y', 'Vy', 'z', 'Vz'])

for i, real_col_name in enumerate(['x', 'y', 'z', 'Vx', 'Vy', 'Vz']):
    row, col = i % 3 + 1, i // 3 + 1
    sim_col_name = real_col_name + '_sim'
    fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                             y=full_test[full_test.sat_id == sat_id][real_col_name],
                             name="Estimated " + real_col_name,
                             ), row=row, col=col)

    fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                             y=full_test[full_test.sat_id == sat_id][sim_col_name],
                             name="Simulation " + real_col_name,
                             ), row=row, col=col)

    fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                             y=full_test[full_test.sat_id == sat_id][sim_col_name],
                             name="Sim_test " + real_col_name,
                             ), row=row, col=col)
    fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                             y=full_train[full_train.sat_id == sat_id][real_col_name],
                             name="Real " + real_col_name,
                             ), row=row, col=col)

    fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                             y=full_train[full_train.sat_id == sat_id][sim_col_name],
                             name="Simulation " + real_col_name,
                             ), row=row, col=col)

fig.show()

In [25]:
# отрисовка нужного рядя для анализа

figure = plt.figure(figsize=(10, 5))


sat_id = 299
real_col_name = 'x'
sim_col_name = real_col_name + '_sim'

fig = go.Figure()
fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                             y=full_test[full_test.sat_id == sat_id][real_col_name],
                             name="Estimated " + real_col_name,
                             ))

fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                         y=full_test[full_test.sat_id == sat_id][sim_col_name],
                         name="Simulation " + real_col_name,
                         ))

fig.add_trace(go.Scatter(x=full_test[full_test.sat_id == sat_id].epoch,
                         y=full_test[full_test.sat_id == sat_id][sim_col_name],
                         name="Sim_test " + real_col_name,
                         ))
fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][real_col_name],
                         name="Real " + real_col_name,
                         ))

fig.add_trace(go.Scatter(x=full_train[full_train.sat_id == sat_id].epoch,
                         y=full_train[full_train.sat_id == sat_id][sim_col_name],
                         name="Simulation " + real_col_name,
                         ))

fig.update_layout(title_text='Time Series for ' + real_col_name,
                  xaxis_rangeslider_visible=True,
                  yaxis_title=real_col_name)
fig.show()

<Figure size 720x360 with 0 Axes>

In [26]:
full_train[full_train.sat_id==25]

Unnamed: 0_level_0,epoch,sat_id,x,y,z,Vx,Vy,Vz,x_sim,y_sim,...,delta_Vx,delta_Vy,delta_Vz,x_sim_num,y_sim_num,z_sim_num,Vx_sim_num,Vy_sim_num,Vz_sim_num,x_num
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
49170,2014-01-01T00:00:00.000,25,-11688.343088,3090.573912,-14579.251996,-4.085267,-2.990730,2.641215,-11650.269626,3150.034596,...,-0.002180,-0.003575,-0.019455,19,23,17,17,18,14,0
49171,2014-01-01T00:59:34.600,25,-20982.661770,-7500.870948,-1371.134347,-1.093479,-2.658589,4.238782,-20975.805340,-7472.648284,...,-0.010786,-0.011701,-0.008759,20,0,18,18,19,15,0
49172,2014-01-01T01:59:09.201,25,-21066.158338,-15225.222626,13248.400649,0.796565,-1.682992,3.792120,-21089.106322,-15238.766260,...,-0.005595,-0.010139,0.000158,21,1,19,19,20,16,0
49173,2014-01-01T02:58:43.801,25,-16576.488169,-19858.027685,25397.656639,1.598034,-0.956059,3.007305,-16611.380403,-19901.873152,...,-0.001715,-0.006573,0.002426,22,2,20,20,21,17,0
49174,2014-01-01T03:58:18.402,25,-10198.800998,-22336.917974,34858.999568,1.919541,-0.461334,2.306414,-10235.711415,-22399.361509,...,0.000152,-0.003776,0.002722,23,3,21,21,22,18,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49917,2014-01-31T19:44:37.348,25,-20225.821660,-5619.419185,-4213.549946,-1.631949,-2.817845,4.141672,-16767.029237,-19896.083254,...,3.226830,1.860744,-1.157057,22,2,20,20,21,17,0
49918,2014-01-31T20:44:11.948,25,-21737.469873,-14000.448306,10561.363158,0.525654,-1.861416,3.914561,-10403.354296,-22392.440561,...,1.390126,1.392324,-1.623429,23,3,21,21,22,18,0
49919,2014-01-31T21:43:46.549,25,-17916.839553,-19188.822806,23203.592572,1.475436,-1.088870,3.148240,-3320.030077,-23415.660585,...,0.547744,0.967064,-1.439000,0,4,22,22,23,19,0
49920,2014-01-31T22:43:21.149,25,-11839.155071,-22076.441573,33143.594953,1.866073,-0.559500,2.431938,3932.729446,-23368.127513,...,0.155548,0.695885,-1.214792,1,5,23,23,0,20,0


In [27]:
full_test[full_test.sat_id==25]

Unnamed: 0_level_0,sat_id,epoch,x_sim,y_sim,z_sim,Vx_sim,Vy_sim,Vz_sim,x_sim_num,y_sim_num,z_sim_num,Vx_sim_num,Vy_sim_num,Vz_sim_num,x,y,z,Vx,Vy,Vz
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
49922,25,2014-02-01T00:42:30.350,17881.788791,-21020.614114,53027.180656,1.854066,0.492290,0.417952,3,7,1,1,2,22,2370.184122,-23535.438135,46330.813484,2.023611,0.088093,1.320288
49923,25,2014-02-01T01:42:04.951,24280.268592,-19027.284588,53910.194434,1.721820,0.618382,0.081554,4,8,2,2,3,23,9525.405862,-22829.289137,50251.468317,1.970931,0.298177,0.883556
49924,25,2014-02-01T02:41:39.551,30163.781753,-16628.073381,53644.976519,1.566537,0.720300,-0.225703,5,9,3,3,4,0,16409.718501,-21456.944956,52710.864196,1.875006,0.463337,0.500143
49925,25,2014-02-01T03:41:14.152,35454.702280,-13901.131644,52323.423429,1.390426,0.802331,-0.510359,6,10,4,4,5,1,22894.413706,-19555.877576,53874.819484,1.748842,0.595556,0.156906
49926,25,2014-02-01T04:40:48.752,40079.169994,-10912.528685,50017.226128,1.193506,0.867040,-0.777269,7,11,5,5,6,2,28884.338635,-17229.383170,53869.064499,1.598893,0.702351,-0.155657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50592,25,2014-02-28T15:59:43.462,3625.768652,-23365.560962,47336.805443,2.023826,0.124890,1.213806,1,5,23,23,0,20,-13154.850638,-21764.619564,31841.949910,1.811845,-0.640743,2.534048
50593,25,2014-02-28T16:59:18.062,10764.845101,-22548.691539,50898.856440,1.962545,0.323943,0.788654,2,6,0,0,1,21,-6317.539302,-23320.847288,39766.546155,1.982827,-0.251340,1.918000
50594,25,2014-02-28T17:58:52.663,17607.796813,-21098.066837,53034.599255,1.860726,0.481700,0.413452,3,7,1,1,2,22,918.548802,-23785.487851,45650.705825,2.016807,0.035504,1.397629
50595,25,2014-02-28T18:58:27.263,24032.348690,-19140.903939,53899.765735,1.729747,0.608758,0.076089,4,8,2,2,3,23,8071.492727,-23250.673565,49834.086347,1.975809,0.254510,0.953509


In [28]:
full_test[['x', 'y', 'z', 'Vx', 'Vy', 'Vz']].to_csv('submission.csv', index_label='id')