## Predikcia vsetkych parametrov pomocou jednej NN pre overcontact data

In [1]:
# Blok 1 - Nacitanie kniznic
import numpy as np
import pandas as pd

from keras.models import load_model
from sklearn.model_selection import train_test_split
from keras.layers import Conv1D, MaxPooling1D
from keras.layers import Input, Dense, LSTM, Dropout, Flatten
from keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

np.random.seed(1234)
pd.set_option('display.max_rows', None)

In [4]:
# Blok 2 - Funkcie pre generovanie sumu. Nastavenie pseudo-nahodneho generatora.
def generate_observation_sigma(space_obs_frac=0.5):
    """
    Draws a standard deviation of noise in light curve points from a "true" value provided in synthetic light curve.
    Noise sigma is drawn from bimodal distribution taking into account contributions from space based and earth based
    observations which have different levels of stochastic noise.

    :param space_obs_frac: ratio between earth based and space based observations
    :return: float; standard deviation of the light curve noise
    """
    earth_based_sigma = 4e-3
    space_based_sigma = 2e-4
    sigma = np.random.choice([earth_based_sigma, space_based_sigma], p=[1-space_obs_frac, space_obs_frac])
    return np.random.rayleigh(sigma)

def stochastic_noise_generator(curve):
    """
    Introduces gaussian noise into synthetic observation provided in `curve`.

    :param curve: numpy.array; normalized light curve
    :return: Tuple(numpy.array, float); normalized light curve with added noise, standard deviation of observations
    """
    sigma = generate_observation_sigma()
    return np.random.normal(curve, sigma), np.full(curve.shape, sigma)

### Data loading

In [5]:
# Blok 3 - Nacitanie dat
data = pd.read_pickle("overcontact_all_parameters.pkl").reset_index()

In [6]:
# Blok 4 - Ukazka dat
data.head()

Unnamed: 0,index,id,curve,primary__t_eff,secondary__t_eff,inclination,mass_ratio,primary__surface_potential,secondary__surface_potential,t1/t2,filter,critical_surface_potential,primary__equivalent_radius,secondary__equivalent_radius,primary__filling_factor,secondary__filling_factor
0,0,5525038,"[0.9271109336686163, 0.9271335908185164, 0.927...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_U,1.959104,0.585781,0.21126,0.169244,0.169244
1,1,5525038,"[0.9267426667358384, 0.9267640025030627, 0.926...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_B,1.959104,0.585781,0.21126,0.169244,0.169244
2,2,5525038,"[0.9271736551553694, 0.927193188167849, 0.9272...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_V,1.959104,0.585781,0.21126,0.169244,0.169244
3,3,5525038,"[0.9286697051715368, 0.9286879105609007, 0.928...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_R,1.959104,0.585781,0.21126,0.169244,0.169244
4,4,5525038,"[0.9304596200748534, 0.9304764401089076, 0.930...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_I,1.959104,0.585781,0.21126,0.169244,0.169244


In [7]:
# Blok 5 - vyselektovanie nahodnej vzorky dat o velkosti 100 000 zaznamov
data_sample = data.sample(n=100000)

### Train-test split

In [9]:
# Blok 6 - Vytvorenie pola kriviek
X = []
for row in data_sample["curve"]:
    X.append(row)
X=np.array(X)

In [11]:
# Blok 7 - vytvorenie pola features, ktore bude model predikovat
# Nepotrebujeme "primary__t_eff", "secondary__t_eff" - staci pomer tychto hodnot = t1/t2
y = np.array(data_sample[[
    "inclination",
    "mass_ratio",
    "primary__surface_potential",
    "secondary__surface_potential",
    "t1/t2",
    "critical_surface_potential",
    "primary__equivalent_radius",
    "secondary__equivalent_radius",
    "primary__filling_factor",
    "secondary__filling_factor"]])

In [13]:
# Blok 8 - rozdelenie dat na trenovaciu / testovaciu mnozinu v pomere 80/20
X_train1, X_test, y_train1, y_test = train_test_split(X, y, test_size=0.2)

In [14]:
# Blok 9 - pridanie sumu do trenovacich dat
X_train = []
y_train = []
for i in range(len(X_train1)):
    for j in range(3):
        curve = stochastic_noise_generator(X_train1[i])
        X_train.append(curve[0])
        y_train.append(y_train1[i])
X_train = np.array(X_train)
y_train=np.array(y_train)

In [15]:
# Blok 10 - vypis poctu dat v jednotlivych datovych mnozinach
print("Number of records in dataset: ", len(data),
    "\nNumber of records in sample: ", len(X),
    "\nNumber of train data without noise: ", len(X_train1),
    "\nNumber of train data with noise: ", len(X_train),
    "\nNumber of test data without noise: ", len(X_test))

Number of records in dataset:  1212796 
Number of records in sample:  100000 
Number of train data without noise:  80000 
Number of train data with noise:  240000 
Number of test data without noise:  20000


## Model

In [None]:
# Blok 11 - zadefinovanie architektury modelu NN
inputs = Input(shape=(400, 1))
b = Conv1D(64, kernel_size = 3, padding = "valid")(inputs)
b = MaxPooling1D(2)(b)
b = Dropout(0.2)(b)
b = LSTM(64, return_sequences=True)(b)
b = Flatten()(b)
b = Dense(64, activation='relu')(b)
x = Dense(32, activation='relu')(b)
output = Dense(10, activation='linear')(x)
model = Model(inputs=inputs, outputs=output)
model.compile(loss='mse', optimizer='adam', metrics=["mae", "mape"])


saved_model = "models/over_allParams.hdf5"
checkpoint = ModelCheckpoint(saved_model, monitor = 'val_mae', verbose = 1, save_best_only = True, mode = 'min')
early = EarlyStopping(monitor = "val_mae", mode = "min", patience = 25)
callbacks_list = [checkpoint, early]

print(model.summary())

In [None]:
# Blok 12 - trenovanie modelu
history = model.fit(X_train, y_train, validation_split = 0.1, epochs = 10, verbose = 1, callbacks = callbacks_list, batch_size = 64)

In [16]:
# Blok 13 - necitanie modelu
model = load_model("models/over_allParams.hdf5")

In [None]:
# Blok 14 - evaluacia modelu na testovacej mnozine bez sumu
model.evaluate(X_test, y_test)

In [17]:
# Blok 15 - pridanie sumu do testovacich dat
X_test_n = []
y_test_n = []
for i in range(len(X_test)):
    for j in range(3):
        curve = stochastic_noise_generator(X_test[i])
        X_test_n.append(curve[0])
        y_test_n.append(y_test[i])
        j += 1
X_test_n = np.array(X_test_n)
y_test_n = np.array(y_test_n)

In [19]:
# Blok 16 - evaluacia modelu na testovacej mnozine so sumom
model.evaluate(X_test_n, y_test_n)



[0.06108751520514488, 0.12077078223228455, 12.228681564331055]

### Prediction on synthetic test data - without noise

In [64]:
# Blok 17 - predikcia na datach bez sumu
y_pred = model.predict(X_test)

In [65]:
# Blok 18 - vytvorenie poli predikovanych hodnot pre jednotlive parametre
P_inclination = [i[0] for i in y_pred]
P_mass_ratio = [i[1] for i in y_pred]
P_prim__surface_potential = [i[2] for i in y_pred]
P_sec__surface_potential = [i[3] for i in y_pred]
P_t1_t2 = [i[4] for i in y_pred]
P_critical_surface_potential = [i[5] for i in y_pred]
P_primary_equivalent_radius = [i[6] for i in y_pred]
P_secondary_equivalent_radius = [i[7] for i in y_pred]
P_primary_filling_factor = [i[8] for i in y_pred]
P_secondary_filling_factor = [i[9] for i in y_pred]

P_inclination = np.reshape(P_inclination, len(P_inclination))
P_mass_ratio = np.reshape(P_mass_ratio, len(P_mass_ratio))
P_prim__surface_potential = np.reshape(P_prim__surface_potential, len(P_prim__surface_potential))
P_sec__surface_potential = np.reshape(P_sec__surface_potential, len(P_sec__surface_potential))
P_t1_t2 = np.reshape(P_t1_t2, len(P_t1_t2))
P_critical_surface_potential = np.reshape(P_critical_surface_potential, len(P_critical_surface_potential))
P_primary_equivalent_radius = np.reshape(P_primary_equivalent_radius, len(P_primary_equivalent_radius))
P_secondary_equivalent_radius = np.reshape(P_secondary_equivalent_radius, len(P_secondary_equivalent_radius))
P_primary_filling_factor = np.reshape(P_primary_filling_factor, len(P_primary_filling_factor))
P_secondary_filling_factor = np.reshape(P_secondary_filling_factor, len(P_secondary_filling_factor))

In [67]:
# Blok 19 - vytvorenie df, ktory bude obsahovat skutocne hodnoty,predikovane hodnoty a absolutny rozdiel
df = pd.DataFrame(data =
     {
    'inclination': [i[0] for i in y_test],
    'P_inclination': P_inclination,
    'inclination_abs_diff': abs([i[0] for i in y_test] - P_inclination),

    'mass_ratio': [i[1] for i in y_test],
    'P_mass_ratio': P_mass_ratio,
    'mass_ratio_abs_diff': abs([i[1] for i in y_test] - P_mass_ratio),

    'prim_surface_potential': [i[2] for i in y_test],
    'P_prim_surf_potential': P_prim__surface_potential,
    'prim_surf_potent_abs_diff': abs([i[2] for i in y_test] - P_prim__surface_potential),

    'sec_surface_potential': [i[3] for i in y_test],
    'P_sec_surf_potential': P_sec__surface_potential,
    'sec_surf_potent_abs_diff': abs([i[3] for i in y_test] - P_sec__surface_potential),

    't1_t2': [i[4] for i in y_test],
    'P_t1_t2': P_t1_t2,
    't1_t2_abs_diff': abs([i[4] for i in y_test] - P_t1_t2),

    'critical_surf_potential': [i[5] for i in y_test],
    'P_crit_surf_potential': P_critical_surface_potential,
    'crit_surf_potential_abs_diff': abs([i[5] for i in y_test] - P_critical_surface_potential),

    'prim_equi_radius': [i[6] for i in y_test],
    'P_prim_equi_radius': P_primary_equivalent_radius,
    'prim_equi_radius_abs_diff': abs([i[6] for i in y_test] - P_primary_equivalent_radius),

    'sec_equi_radius': [i[7] for i in y_test],
    'P_sec_equi_radius': P_secondary_equivalent_radius,
    'sec_equi_radius_abs_diff': abs([i[7] for i in y_test] - P_secondary_equivalent_radius),

    'prim_filling_factor': [i[8] for i in y_test],
    'P_prim_filling_factor': P_primary_filling_factor,
    'prim_filling_factor_abs_diff': abs([i[8] for i in y_test] - P_primary_filling_factor),

    'sec_filling_factor': [i[9] for i in y_test],
    'P_sec_filling_factor': P_secondary_filling_factor,
    'sec_filling_factor_abs_diff': abs([i[9] for i in y_test] - P_secondary_filling_factor)
    })


In [27]:
# Blok 20 - dropnutie stlpcov, ktore obshauju absolutny rozdiel medzi skutocnou a predikovnaou hodnotou
df_copy = df.copy()
df_comp = df_copy.drop([
        'inclination_abs_diff',
        'mass_ratio_abs_diff',
        'prim_surf_potent_abs_diff',
        'sec_surf_potent_abs_diff',
        't1_t2_abs_diff',
        'crit_surf_potential_abs_diff',
        'prim_equi_radius_abs_diff',
        'sec_equi_radius_abs_diff',
        'prim_filling_factor_abs_diff',
        'sec_filling_factor_abs_diff'
        ], axis = 1)

In [28]:
# Blok 21 - ukazka dat
df_comp.head()

Unnamed: 0,inclination,P_inclination,mass_ratio,P_mass_ratio,prim_surface_potential,P_prim_surf_potential,sec_surface_potential,P_sec_surf_potential,t1_t2,P_t1_t2,critical_surf_potential,P_crit_surf_potential,prim_equi_radius,P_prim_equi_radius,sec_equi_radius,P_sec_equi_radius,prim_filling_factor,P_prim_filling_factor,sec_filling_factor,P_sec_filling_factor
0,0.935491,1.094681,3.333333,4.739271,7.007351,8.533597,7.007351,8.441549,1.037037,1.022082,7.054097,8.673995,0.284784,0.278536,0.4921,0.522325,0.074968,0.448388,0.074968,0.364946
1,0.927295,1.017328,2.5,3.622497,5.854167,7.047381,5.854167,7.028968,1.0,1.030903,5.94524,7.331349,0.312934,0.318094,0.471755,0.509637,0.149309,0.482214,0.149309,0.460357
2,1.485231,1.463987,1.0,0.916591,3.600772,3.539768,3.600772,3.51159,1.071429,1.062556,3.75,3.637359,0.406935,0.404025,0.406935,0.389204,0.274719,0.258317,0.274719,0.258596
3,0.902054,0.920394,1.666667,1.3347,4.410223,3.860328,4.410223,3.863567,1.086957,1.036972,4.772403,4.303124,0.389905,0.440025,0.47726,0.479571,0.618986,0.73531,0.618986,0.743143
4,1.462674,1.198251,0.2,0.128971,2.214079,2.173163,2.214079,2.176864,1.041667,1.064237,2.232728,2.21261,0.529977,0.526068,0.257956,0.292184,0.146158,0.294052,0.146158,0.300588


In [140]:
# Results - inclination
avg_true_inc = sum(df_comp.inclination)/len(y_test)
avg_pred_inc = sum(df_comp.P_inclination)/len(y_test)
print('Avg true inc %f' % avg_true_inc)
print('Avg pred inc %f' % avg_pred_inc)

Avg true inc 1.221348
Avg pred inc 1.193492


In [141]:
# Results - mass ratio
avg_true_mr = sum(df_comp.mass_ratio)/len(y_test)
avg_pred_mr = sum(df_comp.P_mass_ratio)/len(y_test)
print('Avg true mass ratio %f' % avg_true_mr)
print('Avg pred mass ratio %f' % avg_pred_mr)

Avg true mass ratio 1.412813
Avg pred mass ratio 1.417530


In [142]:
# Results - primary surface potential
avg_true_psp = sum(df_comp.prim_surface_potential)/len(y_test)
avg_pred_psp = sum(df_comp.P_prim_surf_potential)/len(y_test)
print('Avg true primary surface potential %f' % avg_true_psp)
print('Avg pred primary surface potential %f' % avg_pred_psp)

Avg true primary surface potential 3.972770
Avg pred primary surface potential 3.968599


In [143]:
# Results - secondary surface potential
avg_true_ssp = sum(df_comp.sec_surface_potential)/len(y_test)
avg_pred_ssp = sum(df_comp.P_sec_surf_potential)/len(y_test)
print('Avg true secondary surface potential %f' % avg_true_ssp)
print('Avg pred secondary surface potential %f' % avg_pred_ssp)

Avg true secondary surface potential 3.972770
Avg pred secondary surface potential 3.963105


In [144]:
# Results - temperature ratio
avg_true_tr = sum(df_comp.t1_t2)/len(y_test)
avg_pred_tr = sum(df_comp.P_t1_t2)/len(y_test)
print('Avg true temperature ratio %f' % avg_true_tr)
print('Avg pred temperature ratio %f' % avg_pred_tr)

Avg true temperature ratio 1.048076
Avg pred temperature ratio 1.046491


In [145]:
# Results - critical surface potential
avg_true_csp = sum(df_comp.critical_surf_potential)/len(y_test)
avg_pred_csp = sum(df_comp.P_crit_surf_potential)/len(y_test)
print('Avg true critical surface potential %f' % avg_true_csp)
print('Avg pred critical surface potential %f' % avg_pred_csp)

Avg true critical surface potential 4.286547
Avg pred critical surface potential 4.314012


In [146]:
# Results - primary equivalent radius
avg_true_per = sum(df_comp.prim_equi_radius)/len(y_test)
avg_pred_per = sum(df_comp.P_prim_equi_radius)/len(y_test)
print('Avg true primary equivalent radius %f' % avg_true_per)
print('Avg pred primary equivalent radius %f' % avg_pred_per)

Avg true primary equivalent radius 0.431312
Avg pred primary equivalent radius 0.440175


In [147]:
# Results - secondary equivalent radius
avg_true_ser = sum(df_comp.sec_equi_radius)/len(y_test)
avg_pred_ser = sum(df_comp.P_sec_equi_radius)/len(y_test)
print('Avg true secondary equivalent radius %f' % avg_true_ser)
print('Avg pred secondary equivalent radius %f' % avg_pred_ser)

Avg true secondary equivalent radius 0.446961
Avg pred secondary equivalent radius 0.450492


In [148]:
# Results - primary filling factor
avg_true_pff = sum(df_comp.prim_filling_factor)/len(y_test)
avg_pred_pff = sum(df_comp.P_prim_filling_factor)/len(y_test)
print('Avg true primary filling factor %f' % avg_true_pff)
print('Avg pred primary filling factor %f' % avg_pred_pff)

Avg true primary filling factor 0.615457
Avg pred primary filling factor 0.648980


In [149]:
# Results - secondary filling factor
avg_true_sff = sum(df_comp.sec_filling_factor)/len(y_test)
avg_pred_sff = sum(df_comp.P_sec_filling_factor)/len(y_test)
print('Avg true secondary filling factor %f' % avg_true_sff)
print('Avg pred secondary filling factor %f' % avg_pred_sff)

Avg true secondary filling factor 0.615457
Avg pred secondary filling factor 0.652366


In [154]:
# Blok 22 - vytvorenie df na proovnanie priemernej skutocnej a predikovanej hodnoty
avg_dict = {
    "true_inc": [avg_true_inc],
    "pred_inc": [avg_pred_inc],
    "true_mass_ratio": [avg_true_mr],
    "pred_mass_ratio": [avg_pred_mr],
    "true_prim_surf_potent": [avg_true_psp],
    "pred_surf_potent": [avg_pred_psp],
    "true_sec_surf_potent": [avg_true_ssp],
    "pred_sec_surf_potent": [avg_pred_ssp],
    "true_temp_ratio": [avg_true_tr],
    "pred_temp_ratio":[avg_pred_tr],
    "true_crit_surf_potent":[avg_true_csp],
    "pred_crit_sufr_potent":[avg_pred_csp],
    "true_prim_equi_radius":[avg_true_per],
    "pred_prim_equi_radius":[avg_pred_per],
    "true_sec_equi_radius": [avg_true_ser],
    "preed_sec_equi_radius":[avg_pred_ser],
    "true_prim_fill_factor":[avg_true_pff],
    "pred_prim_fill_factor":[avg_pred_pff],
    "true_sec_fill_factor": [avg_true_sff],
    "pred_sec_fill_factor": [avg_pred_sff]
    }
avg_df = pd.DataFrame.from_dict(avg_dict)

In [155]:
# Blok 23 - ukazka dat
avg_df

Unnamed: 0,true_inc,pred_inc,true_mass_ratio,pred_mass_ratio,true_prim_surf_potent,pred_surf_potent,true_sec_surf_potent,pred_sec_surf_potent,true_temp_ratio,pred_temp_ratio,true_crit_surf_potent,pred_crit_sufr_potent,true_prim_equi_radius,pred_prim_equi_radius,true_sec_equi_radius,preed_sec_equi_radius,true_prim_fill_factor,pred_prim_fill_factor,true_sec_fill_factor,pred_sec_fill_factor
0,1.221348,1.193492,1.412813,1.41753,3.97277,3.968599,3.97277,3.963105,1.048076,1.046491,4.286547,4.314012,0.431312,0.440175,0.446961,0.450492,0.615457,0.64898,0.615457,0.652366


In [29]:
# Blok 24 - zadefinovanie stlpcov
cols = [
    "inclination",
    "mass_ratio",
    "primary__surface_potential",
    "secondary__surface_potential",
    "t1_t2",
    "critical_surface_potential",
    "primary__equivalent_radius",
    "secondary__equivalent_radius",
    "primary__filling_factor",
    "secondary__filling_factor"]

In [30]:
# Blok 25 - vyselektovanie 1 nahodneho zaznamu a porovnanie skutocnych a predikovanych hodnot parametrov
idx = np.random.choice(np.arange(len(y_test)), 1, replace=False)
true_sample = y_test[idx]
pred_sample = y_pred[idx]

comp = pd.DataFrame(columns=cols,data={"true_values": true_sample.flatten(),
                        "predicted_values": pred_sample.flatten(),
                        "abs diff": abs(true_sample.flatten()-pred_sample.flatten()),
                        "relative diff %": (abs(true_sample.flatten()-pred_sample.flatten())/true_sample.flatten()) * 100})

In [32]:
# Blok 26 - ukazka porovnnia
comp

Unnamed: 0,true_values,predicted_values,abs diff,relative diff %
0,1.458632,1.259716,0.198916,13.637155
1,0.3,0.414693,0.114693,38.231138
2,2.306671,2.457006,0.150334,6.517363
3,2.306671,2.509428,0.202757,8.790011
4,1.033333,1.075377,0.042044,4.068752
5,2.466229,2.795861,0.329632,13.365824
6,0.538518,0.53079,0.007728,1.435094
7,0.337164,0.403092,0.065928,19.553721
8,0.852955,0.776245,0.076709,8.993381
9,0.852955,0.79607,0.056884,6.669103


### Prediction on synthetic test data - with  noise

In [None]:
# Blok 27 - preidkcia na testovaich datach so sumom
y_pred_n=model.predict(X_test_n)

In [None]:
# Blok 28 - vytvorenie poli predikovanych hodnot pre jednotlive parametre
P_inclination = [i[0] for i in y_pred_n]
P_mass_ratio = [i[1] for i in y_pred_n]
P_prim__surface_potential = [i[2] for i in y_pred_n]
P_sec__surface_potential = [i[3] for i in y_pred_n]
P_t1_t2 = [i[4] for i in y_pred_n]
P_critical_surface_potential = [i[5] for i in y_pred_n]
P_primary_equivalent_radius = [i[6] for i in y_pred_n]
P_secondary_equivalent_radius = [i[7] for i in y_pred_n]
P_primary_filling_factor = [i[8] for i in y_pred_n]
P_secondary_filling_factor = [i[9] for i in y_pred_n]


P_inclination = np.reshape(P_inclination, len(P_inclination))
P_mass_ratio = np.reshape(P_mass_ratio, len(P_mass_ratio))
P_prim__surface_potential = np.reshape(P_prim__surface_potential, len(P_prim__surface_potential))
P_sec__surface_potential = np.reshape(P_sec__surface_potential, len(P_sec__surface_potential))
P_t1_t2 = np.reshape(P_t1_t2, len(P_t1_t2))
P_critical_surface_potential = np.reshape(P_critical_surface_potential, len(P_critical_surface_potential))
P_primary_equivalent_radius = np.reshape(P_primary_equivalent_radius, len(P_primary_equivalent_radius))
P_secondary_equivalent_radius = np.reshape(P_secondary_equivalent_radius, len(P_secondary_equivalent_radius))
P_primary_filling_factor = np.reshape(P_primary_filling_factor, len(P_primary_filling_factor))
P_secondary_filling_factor = np.reshape(P_secondary_filling_factor, len(P_secondary_filling_factor))


In [None]:
# Blok 29 - vytvorenie df, ktory bude obsahovat skutocne hodnoty, predikovane hodnoty a absolutny rozdiel

df = pd.DataFrame(data =
    {
    'inclination': [i[0] for i in y_test_n],
    'P_inclination': P_inclination,
    'inclination_abs_diff': abs([i[0] for i in y_test_n] - P_inclination),

    'mass_ratio': [i[1] for i in y_test_n],
    'P_mass_ratio': P_mass_ratio,
    'mass_ratio_abs_diff': abs([i[1] for i in y_test_n] - P_mass_ratio),

    'prim_surface_potential': [i[2] for i in y_test_n],
    'P_prim_surf_potential': P_prim__surface_potential,
    'prim_surf_potent_abs_diff': abs([i[2] for i in y_test_n] - P_prim__surface_potential),

    'sec_surface_potential': [i[3] for i in y_test_n],
    'P_sec_surf_potential': P_sec__surface_potential,
    'sec_surf_potent_abs_diff': abs([i[3] for i in y_test_n] - P_sec__surface_potential),

    't1_t2': [i[4] for i in y_test_n],
    'P_t1_t2': P_t1_t2,
    't1_t2_abs_diff': abs([i[4] for i in y_test_n] - P_t1_t2),

    'critical_surf_potential': [i[5] for i in y_test_n],
    'P_crit_surf_potential': P_critical_surface_potential,
    'crit_surf_potential_abs_diff': abs([i[5] for i in y_test_n] - P_critical_surface_potential),

    'prim_equi_radius': [i[6] for i in y_test_n],
    'P_prim_equi_radius': P_primary_equivalent_radius,
    'prim_equi_radius_abs_diff': abs([i[6] for i in y_test_n] - P_primary_equivalent_radius),

    'sec_equi_radius': [i[7] for i in y_test_n],
    'P_sec_equi_radius': P_secondary_equivalent_radius,
    'sec_equi_radius_abs_diff': abs([i[7] for i in y_test_n] - P_secondary_equivalent_radius),

    'prim_filling_factor': [i[8] for i in y_test_n],
    'P_prim_filling_factor': P_primary_filling_factor,
    'prim_filling_factor_abs_diff': abs([i[8] for i in y_test_n] - P_primary_filling_factor),

    'sec_filling_factor': [i[9] for i in y_test_n],
    'P_sec_filling_factor': P_secondary_filling_factor,
    'sec_filling_factor_abs_diff': abs([i[9] for i in y_test_n] - P_secondary_filling_factor)
    })


In [None]:
# Blok 30 - dropnutie stlpcov, ktore obshauju absolutny rozdiel medzi skutocnou a predikovnaou hodnotou
df_copy = df.copy()
df_comp = df_copy.drop([
        'inclination_abs_diff',
        'mass_ratio_abs_diff',
        'prim_surf_potent_abs_diff',
        'sec_surf_potent_abs_diff',
        't1_t2_abs_diff',
        'crit_surf_potential_abs_diff',
        'prim_equi_radius_abs_diff',
        'sec_equi_radius_abs_diff',
        'prim_filling_factor_abs_diff',
        'sec_filling_factor_abs_diff'
        ], axis = 1)
df_comp.head()

In [None]:
# Blok 31 - zadefinovanie stlpcov
cols = [
    "inclination",
    "mass_ratio",
    "primary__surface_potential",
    "secondary__surface_potential",
    "t1_t2",
    "critical_surface_potential",
    "primary__equivalent_radius",
    "secondary__equivalent_radius",
    "primary__filling_factor",
    "secondary__filling_factor"]

In [None]:
# Blok 32 - vyselektovanie 1 nahodneho zaznamu a porovnanie skutocnych a predikovanych hodnot parametrov
idx = np.random.choice(np.arange(len(y_test_n)), 1, replace=False)
true_sample = y_test_n[idx]
pred_sample = y_pred_n[idx]

comp = pd.DataFrame(data={"true_values": true_sample.flatten(),
                        "predicted_values": pred_sample.flatten(),
                        "abs diff": abs(true_sample.flatten()-pred_sample.flatten()),
                        "relative diff %": (abs(true_sample.flatten()-pred_sample.flatten())/true_sample.flatten()) * 100})