# Prediction of all parameters on observed data
### Selected attributes:
* inclination - inc
* mass ration - q
* primary potential - omega 1
* secondary potential - omega 2
* temperature ratio - t1_t2

## Detached observed
### Model used: models/norm_detached_sel_v4.hdf5 and individual models for feature prediction

In [1]:
# Blok 1 - nacitanie kniznic
import numpy as np
import pandas as pd
import math
from keras.models import load_model
from ast import literal_eval
from random import randint
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Blok 2 - nacitanie detached observacnych kriviek, dropnutie nepotrebneho stlpca
data_det = pd.read_csv("observed\observed_det.csv")
data_det = data_det.drop(columns=["Unnamed: 0"])
data_det.head()

Unnamed: 0,q,inc,omega1,omega2,T1,T2,P,Type,Spot,Ref,name,filter,curve,t1_t2,primary__equivalent_radius,secondary__equivalent_radius
0,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,V,"[0.4986, 0.4955, 0.4943, 0.4956, 0.4995, 0.505...",1.653154,0.27175,0.316676
1,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,I,"[0.472, 0.4675, 0.4655, 0.4667, 0.4711, 0.4781...",1.653154,0.27175,0.316676
2,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,R,"[0.5416, 0.5392, 0.5384, 0.5393, 0.5421, 0.546...",1.653154,0.27175,0.316676
3,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,B,"[0.5167, 0.5152, 0.5154, 0.5174, 0.521, 0.5263...",1.653154,0.27175,0.316676
4,0.366,88.7,2.859,2.608,5940.0,3450.0,0.56899,detached,Y,https://academic.oup.com/pasj/article/70/4/72/...,BUVul,V,"[0.4113, 0.4087, 0.4076, 0.4083, 0.4108, 0.415...",1.721739,0.41515,0.295522


In [3]:
# Blok 3 - vytvorenie pola kriviek
X_det = []
for row in data_det["curve"]:
    processed_curve = literal_eval(row)
    X_det.append(processed_curve)
X_det = np.array(X_det, dtype = np.float32)

In [4]:
# Blok 4 - vytvorenie pola features, ktore predikujeme
y_det = np.array(data_det[[
    "inc",
    "q",
    "omega1",
    "omega2",
    "t1_t2"]])

In [5]:
# Blok 5 - nacitanie modelov
# model_det_v4 je trenovany na normovanych krivkach, je potrebna denormalizacia predikcii
model_det_v4 = load_model("models/norm_detached_sel_v4.hdf5")

# modely su trenovane na povodnych datach, nie su potrebne upravy predikcii
model_det_inc = load_model("models/detached_inclination.hdf5")        # inclination
model_det_mass_ratio = load_model("models/detached_mass_ratio.hdf5")  # mass ratio
model_det_prim_potent = load_model("models/detached_pSP.hdf5")        # primary surface potential
model_det_sec_potent = load_model("models/detached_sSP.hdf5")         # secondary surface potential
model_det_temp_ratio = load_model("models/detached_t1_t2.hdf5")       # temperature ratio

### Predikcie s modelom model_det_v4

In [6]:
# Blok 7 - predikcia observacnych det kriviek
pred_det = model_det_v4.predict(X_det)

In [7]:
# Blok 8 - uprava predikcii do podoby dataframeu
pred_det_df = pd.DataFrame(pred_det, columns = ['inc_predicted', 'mass_ratio_predicted', 'omega1_predicted', 'omega2_predicted', 't1_t2_predicted'])
pred_det_df.head()

Unnamed: 0,inc_predicted,mass_ratio_predicted,omega1_predicted,omega2_predicted,t1_t2_predicted
0,0.662311,0.305576,0.050511,0.011517,0.006339
1,0.714567,0.220312,0.042466,0.009666,-0.00218
2,0.651992,0.191949,0.055177,0.004846,0.022118
3,0.643667,0.271862,0.05263,0.008949,0.01092
4,0.665633,0.100371,0.039383,0.004423,0.253217


In [8]:
# Blok 9 - zadefinovanie scalera a spatna normalizacia predikcii
scaler_det = MinMaxScaler()
y_det_minmax_scaled = scaler_det.fit_transform(y_det)
pred_denorm_det = scaler_det.inverse_transform(pred_det)
pred_denorm_det[0]

array([84.51266   ,  0.39932293,  2.8435717 ,  2.1367192 ,  1.0066773 ],
      dtype=float32)

In [9]:
# Blok 10 - uprava denormalizovanych predikcii do podoby df
pred_denorm_det_df = pd.DataFrame(pred_denorm_det,
                        columns = [
                        "pred_inc",
                        "pred_q",
                        "pred_omega1",
                        "pred_omega2",
                        "pred_t1_t2"])
pred_denorm_det_df.head()

Unnamed: 0,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2
0,84.512657,0.399323,2.843572,2.136719,1.006677
1,85.160629,0.32557,2.795223,2.124713,0.998712
2,84.384697,0.301036,2.871614,2.093443,1.021431
3,84.281471,0.37016,2.856305,2.120062,1.010961
4,84.553848,0.221821,2.77669,2.090698,1.237508


In [11]:
# Blok 11 - vytvorenie csv suboru, ktory bude pouzity na vykreslenie predikovanych kriviek
# Na stlpce skutocnych a predikovanych hodnot inclination je ptorebne aplikovat upravu na radiany
target_det_obs = data_det[['name', 'filter']]
target_det_obs = target_det_obs.join(pred_denorm_det_df)
target_det_obs = target_det_obs.join(pd.DataFrame(y_det, columns = ['inc', 'q', 'omega1', 'omega2', 't1_t2']))
target_det_obs['pred_inc'] = target_det_obs['pred_inc'].astype(float).apply(math.radians)
target_det_obs['inc'] = target_det_obs['inc'].astype(float).apply(math.radians)
target_det_obs.head()

Unnamed: 0,name,filter,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2,inc,q,omega1,omega2,t1_t2
0,ATPeg,V,1.475024,0.399323,2.843572,2.136719,1.006677,1.331686,0.484,4.2,2.85,1.653154
1,ATPeg,I,1.486333,0.32557,2.795223,2.124713,0.998712,1.331686,0.484,4.2,2.85,1.653154
2,ATPeg,R,1.472791,0.301036,2.871614,2.093443,1.021431,1.331686,0.484,4.2,2.85,1.653154
3,ATPeg,B,1.470989,0.37016,2.856305,2.120062,1.010961,1.331686,0.484,4.2,2.85,1.653154
4,BUVul,V,1.475743,0.221821,2.77669,2.090698,1.237508,1.548107,0.366,2.859,2.608,1.721739


In [None]:
# Blok 12 - ulozenie csv suboru
target_det_obs.to_csv('ml_predictor_evaluator/src/data/OBS_detached_model_v4.csv')

In [12]:
# Blok 13 - porovnanie skutocnych hodnot a predikcii
pred_mean_det = pred_denorm_det_df.mean(axis=0)
true_det_df = pd.DataFrame(y_det,
                        columns = [
                        "inc",
                        "q",
                        "omega1",
                        "omega2",
                        "t1_t2"])
true_mean_det = true_det_df.mean(axis=0)

eval_pred = pd.DataFrame({'attribute': true_mean_det.index,
            'avg_true': true_mean_det.values,
            'avg_pred': pred_mean_det.values,
            'MAE': abs(true_mean_det.values - pred_mean_det.values)})
eval_pred

Unnamed: 0,attribute,avg_true,avg_pred,MAE
0,inc,83.46,85.543533,2.083533
1,q,0.48544,0.226305,0.259135
2,omega1,4.39708,2.802002,1.595078
3,omega2,3.44504,2.112107,1.332933
4,t1_t2,1.516795,1.096186,0.420609


### Predikcie s individualnymi modelmi

In [16]:
# Blok 14 - predikcie individualnymi modelmi
data_det = data_det[["name", "filter", "inc", "q", "omega1", "omega2", "t1_t2"]]

pred_inc = model_det_inc.predict(X_det).flatten()
pred_q = model_det_mass_ratio.predict(X_det).flatten()
pred_pp = model_det_prim_potent.predict(X_det).flatten()
pred_sp = model_det_sec_potent.predict(X_det).flatten()
pred_tr = model_det_temp_ratio.predict(X_det).flatten()

data_det['pred_inc']=pred_inc
data_det['pred_q']=pred_q
data_det['pred_omega1']=pred_pp
data_det['pred_omega2']=pred_sp
data_det['pred_t1_t2']=pred_tr

# je potrebna uprava stlpca inc na radiany, povodne hodnoty su v stupnoch
data_det['inc'] = data_det['inc'].astype(float).apply(math.radians)

data_det.head()

Unnamed: 0,name,filter,inc,q,omega1,omega2,t1_t2,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2
0,ATPeg,V,1.331686,0.484,4.2,2.85,1.653154,1.3017,0.952172,6.590807,3.198319,1.698311
1,ATPeg,I,1.331686,0.484,4.2,2.85,1.653154,1.32018,0.431483,6.108151,2.953681,1.925024
2,ATPeg,R,1.331686,0.484,4.2,2.85,1.653154,1.309564,1.316318,7.56989,3.909111,1.543464
3,ATPeg,B,1.331686,0.484,4.2,2.85,1.653154,1.299741,1.508953,7.92051,3.546857,1.529411
4,BUVul,V,1.548107,0.366,2.859,2.608,1.721739,1.366025,0.503859,8.363584,6.834431,2.619356


In [18]:
# Blok 15 - vytvorenie df na porovnanie priemernych skutocnych a predikovanych hodnot
pred_det_df = pd.DataFrame(data_det,
                        columns = [
                        "pred_inc",
                        "pred_q",
                        "pred_omega1",
                        "pred_omega2",
                        "pred_t1_t2"])
pred_mean_det = pred_det_df.mean(axis=0)

true_det_df = pd.DataFrame(data_det,
                        columns = [
                        "inc",
                        "q",
                        "omega1",
                        "omega2",
                        "t1_t2"])
true_mean_det = true_det_df.mean(axis=0)

eval_pred = pd.DataFrame({'attribute': true_mean_det.index,
            'avg_true': true_mean_det.values,
            'avg_pred': pred_mean_det.values,
            'MAE': abs(true_mean_det.values - pred_mean_det.values)})
eval_pred

Unnamed: 0,attribute,avg_true,avg_pred,MAE
0,inc,1.456652,1.397473,0.059179
1,q,0.48544,1.253075,0.767635
2,omega1,4.39708,6.306833,1.909753
3,omega2,3.44504,6.849835,3.404795
4,t1_t2,1.516795,1.960292,0.443497


In [20]:
# Blok 16 - ulozenie csv suboru
data_det.to_csv('ml_predictor_evaluator/src/data/OBS_detached_model_individual.csv')

## Overcontact observed
### Model used: models/norm_overcontact_selection.hdf5

In [22]:
# Blok 17 - nacitanie overcontact observacnych kriviek, dropnutie nepotrebneho stlpca
data_over = pd.read_csv("observed\observed_over.csv")
data_over = data_over.drop(columns=["Unnamed: 0"])
data_over.head()

Unnamed: 0,q,inc,omega1,omega2,T1,T2,P,Type,Spot,Ref,name,filter,curve,t1_t2,primary__equivalent_radius,secondary__equivalent_radius
0,0.169,75.46,2.08,2.08,6215.0,6141.0,0.494108,overcontact,N,http://liber.onu.edu.ua/pdf/astro/all/OAP_14/0...,AH Aur,V,"[0.6897, 0.6901, 0.6904, 0.691, 0.6916, 0.6925...",1.01205,0.565838,0.271593
1,0.169,75.46,2.08,2.08,6215.0,6141.0,0.494108,overcontact,N,http://liber.onu.edu.ua/pdf/astro/all/OAP_14/0...,AH Aur,B,"[0.656, 0.6549, 0.6537, 0.6525, 0.6516, 0.6513...",1.01205,0.565838,0.271593
2,0.27,81.7,2.357,2.357,6500.0,6180.0,0.421522,overcontact,Y,https://iopscience.iop.org/article/10.1088/000...,AKHer,V,"[0.6688, 0.6646, 0.6608, 0.6572, 0.6539, 0.651...",1.05178,0.510294,0.286772
3,0.27,81.7,2.357,2.357,6500.0,6180.0,0.421522,overcontact,Y,https://iopscience.iop.org/article/10.1088/000...,AKHer,I,"[0.6932, 0.6884, 0.6841, 0.6804, 0.6773, 0.674...",1.05178,0.510294,0.286772
4,0.27,81.7,2.357,2.357,6500.0,6180.0,0.421522,overcontact,Y,https://iopscience.iop.org/article/10.1088/000...,AKHer,R,"[0.6524, 0.6518, 0.652, 0.6532, 0.6554, 0.6582...",1.05178,0.510294,0.286772


In [29]:
# Blok 18 - vytvorenie pola kriviek
X_over = []
for row in data_over["curve"]:
    processed_curve = literal_eval(row)
    X_over.append(processed_curve)
X_over = np.array(X_over, dtype = np.float32)

In [30]:
# Blok 19 - vytvorenie pola features, ktore predikujeme
y_over = np.array(data_over[[
    "inc",
    "q",
    "omega1",
    "omega2",
    "t1_t2"]])

In [31]:
# Blok 20 - nacitanie modelu
model_over = load_model("models/norm_overcontact_selection.hdf5")

In [32]:
# Blok 21 - predikcia observacnych overcontact kriviek
pred_over = model_over.predict(X_over)



In [33]:
# Blok 22 - uprava predikcii do podoby dataframeu
pred_over_df = pd.DataFrame(pred_over, columns = ['inc_predicted', 'mass_ratio_predicted', 'omega1_predicted', 'omega2_predicted', 't1_t2_predicted'])
pred_over_df.head()

Unnamed: 0,inc_predicted,mass_ratio_predicted,omega1_predicted,omega2_predicted,t1_t2_predicted
0,0.795459,0.031154,0.048805,0.049841,0.592908
1,0.74924,0.023258,0.041311,0.041586,0.821542
2,0.692839,0.96254,0.94927,0.950432,1.049423
3,0.759453,0.952245,0.956856,0.962453,0.680572
4,0.802173,0.442949,0.489219,0.489919,0.887496


In [34]:
# Blok 23 - zadefinovanie scalera a spatna normalizacia predikcii, uprava do dataframeu
scaler_over = MinMaxScaler()
y_over_minmax_scaled = scaler_over.fit_transform(y_over)
pred_denorm_over = scaler_over.inverse_transform(pred_over)

pred_denorm_over_df = pd.DataFrame(pred_denorm_over,
                        columns = [
                        "pred_inc",
                        "pred_q",
                        "pred_omega1",
                        "pred_omega2",
                        "pred_t1_t2"])
pred_denorm_over_df.head()

Unnamed: 0,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2
0,79.606049,0.255079,2.29069,2.295163,1.017947
1,78.16124,0.233262,2.258338,2.259529,1.046453
2,76.398155,2.828499,6.177999,6.183016,1.074864
3,78.480515,2.800054,6.210745,6.23491,1.028877
4,79.815918,1.392867,4.19196,4.19498,1.054676


In [35]:
# Blok 24 - vytvorenie csv suboru, ktory bude pouzity na vykreslenie predikovanych kriviek
# Na stlpce skutocnych a predikovanych hodnot inclination je ptorebne aplikovat upravu na radiany
# Nakolko ide o overcontact krivky, je potrebne, aby omega 1 a omega 2 mali rovnake hodnoty - preto sme sa rozhodli
# nahradit predikovanu hodnotu omega 1 predikovanou hodnotou omega2
target_over_obs = data_over[['name', 'filter']]
target_over_obs = target_over_obs.join(pred_denorm_over_df)
target_over_obs = target_over_obs.join(pd.DataFrame(y_over, columns = ['inc', 'q', 'omega1', 'omega2', 't1_t2']))
target_over_obs['pred_inc'] = target_over_obs['pred_inc'].astype(float).apply(math.radians)
target_over_obs['inc'] = target_over_obs['inc'].astype(float).apply(math.radians)
target_over_obs['pred_omega2'] = target_over_obs['pred_omega1']
target_over_obs.head()

Unnamed: 0,name,filter,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2,inc,q,omega1,omega2,t1_t2
0,AH Aur,V,1.389388,0.255079,2.29069,2.29069,1.017947,1.317025,0.169,2.08,2.08,1.01205
1,AH Aur,B,1.364171,0.233262,2.258338,2.258338,1.046453,1.317025,0.169,2.08,2.08,1.01205
2,AKHer,V,1.333399,2.828499,6.177999,6.177999,1.074864,1.425934,0.27,2.357,2.357,1.05178
3,AKHer,I,1.369743,2.800054,6.210745,6.210745,1.028877,1.425934,0.27,2.357,2.357,1.05178
4,AKHer,R,1.393051,1.392867,4.19196,4.19196,1.054676,1.425934,0.27,2.357,2.357,1.05178


In [36]:
# Blok 25 - ulozenie csv suboru
target_over_obs.to_csv('ml_predictor_evaluator/src/data/OBS_overcontact_model.csv')