# Prediction of all parameters on observed data
### Selected attributes:
* inclination - inc
* mass ration - q
* primary potential - omega 1
* secondary potential - omega 2
* temperature ratio - t1_t2

## Detached observed
### Model used: norm_detached_selection

In [1]:
# Libraries
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from keras.models import load_model
from ast import literal_eval
from random import randint
from sklearn.preprocessing import MinMaxScaler


In [2]:
# Detached data loading
data_det = pd.read_csv("observed\observed_det.csv")
data_det.head()

Unnamed: 0.1,Unnamed: 0,q,inc,omega1,omega2,T1,T2,P,Type,Spot,Ref,name,filter,curve,t1_t2,primary__equivalent_radius,secondary__equivalent_radius
0,0,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,V,"[0.4986, 0.4955, 0.4943, 0.4956, 0.4995, 0.505...",1.653154,0.27175,0.316676
1,1,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,I,"[0.472, 0.4675, 0.4655, 0.4667, 0.4711, 0.4781...",1.653154,0.27175,0.316676
2,2,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,R,"[0.5416, 0.5392, 0.5384, 0.5393, 0.5421, 0.546...",1.653154,0.27175,0.316676
3,3,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,B,"[0.5167, 0.5152, 0.5154, 0.5174, 0.521, 0.5263...",1.653154,0.27175,0.316676
4,4,0.366,88.7,2.859,2.608,5940.0,3450.0,0.56899,detached,Y,https://academic.oup.com/pasj/article/70/4/72/...,BUVul,V,"[0.4113, 0.4087, 0.4076, 0.4083, 0.4108, 0.415...",1.721739,0.41515,0.295522


In [3]:
data_det = data_det.drop(columns=["Unnamed: 0"])
data_det.head()

Unnamed: 0,q,inc,omega1,omega2,T1,T2,P,Type,Spot,Ref,name,filter,curve,t1_t2,primary__equivalent_radius,secondary__equivalent_radius
0,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,V,"[0.4986, 0.4955, 0.4943, 0.4956, 0.4995, 0.505...",1.653154,0.27175,0.316676
1,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,I,"[0.472, 0.4675, 0.4655, 0.4667, 0.4711, 0.4781...",1.653154,0.27175,0.316676
2,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,R,"[0.5416, 0.5392, 0.5384, 0.5393, 0.5421, 0.546...",1.653154,0.27175,0.316676
3,0.484,76.3,4.2,2.85,8360.0,5057.0,1.146065,detached,N,https://iopscience.iop.org/article/10.3847/000...,ATPeg,B,"[0.5167, 0.5152, 0.5154, 0.5174, 0.521, 0.5263...",1.653154,0.27175,0.316676
4,0.366,88.7,2.859,2.608,5940.0,3450.0,0.56899,detached,Y,https://academic.oup.com/pasj/article/70/4/72/...,BUVul,V,"[0.4113, 0.4087, 0.4076, 0.4083, 0.4108, 0.415...",1.721739,0.41515,0.295522


In [4]:
print("Number of records of observed detached data: ", len(data_det))

Number of records of observed detached data:  25


In [5]:
# Curves loading
X_det = []
for row in data_det["curve"]:
    processed_curve = literal_eval(row)
    X_det.append(processed_curve)
X_det = np.array(X_det, dtype = np.float32)

In [6]:
list(data_det.columns)

['q',
 'inc',
 'omega1',
 'omega2',
 'T1',
 'T2',
 'P',
 'Type',
 'Spot',
 'Ref',
 'name',
 'filter',
 'curve',
 't1_t2',
 'primary__equivalent_radius',
 'secondary__equivalent_radius']

In [7]:
y_det = np.array(data_det[[
    "inc",
    "q",
    "omega1",
    "omega2",
    "t1_t2"]])

In [8]:
model_det = load_model("models/norm_detached_selection.hdf5")

In [8]:
model_det_v4 = load_model("models/norm_detached_sel_v4.hdf5")

In [9]:
pred_det = model_det_v4.predict(X_det)

In [10]:
pred_det_df = pd.DataFrame(pred_det, columns = ['inc_predicted', 'mass_ratio_predicted', 'omega1_predicted', 'omega2_predicted', 't1_t2_predicted'])
pred_det_df.head()

Unnamed: 0,inc_predicted,mass_ratio_predicted,omega1_predicted,omega2_predicted,t1_t2_predicted
0,0.638159,0.516162,0.033969,0.03015,-0.019047
1,0.648916,0.587977,0.059657,0.031931,-0.036079
2,0.675549,0.244907,0.057358,0.00561,0.010377
3,0.640934,0.314146,0.046008,0.002068,-0.001872
4,0.706086,0.030905,0.007445,0.005982,0.276676


In [11]:
# MinMax Scaler
scaler_det = MinMaxScaler()
y_det_minmax_scaled = scaler_det.fit_transform(y_det)
pred_denorm_det = scaler_det.inverse_transform(pred_det) # denormovane predikcie detached kriviek

In [12]:
pred_denorm_det[0]

array([84.213165 ,  0.5814799,  2.744151 ,  2.2576132,  0.9829418],
      dtype=float32)

In [13]:
pred_denorm_det_df = pd.DataFrame(pred_denorm_det,
                        columns = [
                        "pred_inc",
                        "pred_q",
                        "pred_omega1",
                        "pred_omega2",
                        "pred_t1_t2"])
pred_denorm_det_df.head()

Unnamed: 0,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2
0,84.213165,0.58148,2.744151,2.257613,0.982942
1,84.346558,0.6436,2.89854,2.26917,0.967017
2,84.676811,0.346845,2.884721,2.098396,1.010453
3,84.247589,0.406736,2.816511,2.075418,0.999
4,85.055466,0.161733,2.584743,2.100811,1.259442


In [17]:
# Create csv file to use it later for plotting of curves
target_det_obs = data_det[['name', 'filter']]
target_det_obs = target_det_obs.join(pred_denorm_det_df)
# target_det_obs["pred_omega2"] = data_det['omega2'] # pridanie true hodnot ako "predikovane" pre skusku
# target_det_obs["pred_q"] = data_det['q']
# target_det_obs = target_det_obs.join(pred_denorm_det_df[["pred_inc", "pred_omega1", "pred_t1_t2"]]) # pridanie zvysnych predikovanych hodnot
target_det_obs = target_det_obs.join(pd.DataFrame(y_det, columns = ['inc', 'q', 'omega1', 'omega2', 't1_t2']))
# print(target_det_obs.head())
print(target_det_obs.head())

    name filter   pred_inc    pred_q  pred_omega1  pred_omega2  pred_t1_t2  \
0  ATPeg      V  84.213165  0.581480     2.744151     2.257613    0.982942   
1  ATPeg      I  84.346558  0.643600     2.898540     2.269170    0.967017   
2  ATPeg      R  84.676811  0.346845     2.884721     2.098396    1.010453   
3  ATPeg      B  84.247589  0.406736     2.816511     2.075418    0.999000   
4  BUVul      V  85.055466  0.161733     2.584743     2.100811    1.259442   

    inc      q  omega1  omega2     t1_t2  
0  76.3  0.484   4.200   2.850  1.653154  
1  76.3  0.484   4.200   2.850  1.653154  
2  76.3  0.484   4.200   2.850  1.653154  
3  76.3  0.484   4.200   2.850  1.653154  
4  88.7  0.366   2.859   2.608  1.721739  


In [18]:
# For purpose of plttoing curves from predicted parameters, it is necessary to convert inclination - degrees to radians

target_det_obs['pred_inc'] = target_det_obs['pred_inc'].astype(float).apply(math.radians)
target_det_obs['inc'] = target_det_obs['inc'].astype(float).apply(math.radians)

target_det_obs.head()

Unnamed: 0,name,filter,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2,inc,q,omega1,omega2,t1_t2
0,ATPeg,V,1.469797,0.58148,2.744151,2.257613,0.982942,1.331686,0.484,4.2,2.85,1.653154
1,ATPeg,I,1.472125,0.6436,2.89854,2.26917,0.967017,1.331686,0.484,4.2,2.85,1.653154
2,ATPeg,R,1.477889,0.346845,2.884721,2.098396,1.010453,1.331686,0.484,4.2,2.85,1.653154
3,ATPeg,B,1.470398,0.406736,2.816511,2.075418,0.999,1.331686,0.484,4.2,2.85,1.653154
4,BUVul,V,1.484498,0.161733,2.584743,2.100811,1.259442,1.548107,0.366,2.859,2.608,1.721739


In [19]:
# Save true and predicted attributes (inclination in radians) to csv
# target_det_obs.to_csv('ploted_curves_observed/target_det_obs.csv')
target_det_obs.to_csv('ml_predictor_evaluator/src/target_det_obs_v4_model.csv')

In [18]:
# Predicted mean values of selected attributes
pred_mean_det = pred_denorm_det_df.mean(axis=0)
pred_mean_det

pred_inc       86.258858
pred_q          0.241861
pred_omega1     2.698353
pred_omega2     2.108686
pred_t1_t2      1.122399
dtype: float32

In [19]:
true_det_df = pd.DataFrame(y_det,
                        columns = [
                        "inc",
                        "q",
                        "omega1",
                        "omega2",
                        "t1_t2"])
true_det_df.head()

Unnamed: 0,inc,q,omega1,omega2,t1_t2
0,76.3,0.484,4.2,2.85,1.653154
1,76.3,0.484,4.2,2.85,1.653154
2,76.3,0.484,4.2,2.85,1.653154
3,76.3,0.484,4.2,2.85,1.653154
4,88.7,0.366,2.859,2.608,1.721739


In [20]:
true_mean_det = true_det_df.mean(axis=0)
true_mean_det

inc       83.460000
q          0.485440
omega1     4.397080
omega2     3.445040
t1_t2      1.516795
dtype: float64

In [21]:
eval_pred = pd.DataFrame({'attribute': true_mean_det.index,
            'avg_true': true_mean_det.values,
            'avg_pred': pred_mean_det.values,
            'MAE': abs(true_mean_det.values - pred_mean_det.values)})
eval_pred

Unnamed: 0,attribute,avg_true,avg_pred,MAE
0,inc,83.46,86.258858,2.798858
1,q,0.48544,0.241861,0.243579
2,omega1,4.39708,2.698353,1.698727
3,omega2,3.44504,2.108686,1.336354
4,t1_t2,1.516795,1.122399,0.394396


## Overcontact observed
### Model used: norm_overcontact_selection

In [22]:
# Overcontact data loading
data_over = pd.read_csv("observed\observed_over.csv")


In [23]:
data_over = data_over.drop(columns=["Unnamed: 0"])
data_over.head()

Unnamed: 0,q,inc,omega1,omega2,T1,T2,P,Type,Spot,Ref,name,filter,curve,t1_t2,primary__equivalent_radius,secondary__equivalent_radius
0,0.169,75.46,2.08,2.08,6215.0,6141.0,0.494108,overcontact,N,http://liber.onu.edu.ua/pdf/astro/all/OAP_14/0...,AH Aur,V,"[0.6897, 0.6901, 0.6904, 0.691, 0.6916, 0.6925...",1.01205,0.565838,0.271593
1,0.169,75.46,2.08,2.08,6215.0,6141.0,0.494108,overcontact,N,http://liber.onu.edu.ua/pdf/astro/all/OAP_14/0...,AH Aur,B,"[0.656, 0.6549, 0.6537, 0.6525, 0.6516, 0.6513...",1.01205,0.565838,0.271593
2,0.27,81.7,2.357,2.357,6500.0,6180.0,0.421522,overcontact,Y,https://iopscience.iop.org/article/10.1088/000...,AKHer,V,"[0.6688, 0.6646, 0.6608, 0.6572, 0.6539, 0.651...",1.05178,0.510294,0.286772
3,0.27,81.7,2.357,2.357,6500.0,6180.0,0.421522,overcontact,Y,https://iopscience.iop.org/article/10.1088/000...,AKHer,I,"[0.6932, 0.6884, 0.6841, 0.6804, 0.6773, 0.674...",1.05178,0.510294,0.286772
4,0.27,81.7,2.357,2.357,6500.0,6180.0,0.421522,overcontact,Y,https://iopscience.iop.org/article/10.1088/000...,AKHer,R,"[0.6524, 0.6518, 0.652, 0.6532, 0.6554, 0.6582...",1.05178,0.510294,0.286772


In [24]:
print("Number of records of observed overcontact data: ", len(data_over))

Number of records of observed overcontact data:  43


In [25]:
# Curves loading
X_over = []
for row in data_over["curve"]:
    processed_curve = literal_eval(row)
    X_over.append(processed_curve)
X_ver = np.array(X_over, dtype = np.float32)

In [26]:
list(data_over.columns)

['q',
 'inc',
 'omega1',
 'omega2',
 'T1',
 'T2',
 'P',
 'Type',
 'Spot',
 'Ref',
 'name',
 'filter',
 'curve',
 't1_t2',
 'primary__equivalent_radius',
 'secondary__equivalent_radius']

In [27]:
y_over = np.array(data_over[[
    "inc",
    "q",
    "omega1",
    "omega2",
    "t1_t2"]])

In [28]:
model_over = load_model("models/norm_overcontact_selection.hdf5")

In [29]:
pred_over = model_over.predict(X_over)

In [30]:
# MinMax Scaler
scaler_over = MinMaxScaler()
y_over_minmax_scaled = scaler_over.fit_transform(y_over)
pred_denorm_over = scaler_over.inverse_transform(pred_over)

In [31]:
pred_denorm_over_df = pd.DataFrame(pred_denorm_over,
                        columns = [
                        "pred_inc",
                        "pred_q",
                        "pred_omega1",
                        "pred_omega2",
                        "pred_t1_t2"])
pred_denorm_over_df.head()

Unnamed: 0,pred_inc,pred_q,pred_omega1,pred_omega2,pred_t1_t2
0,79.606049,0.255079,2.29069,2.295163,1.017947
1,78.16124,0.233262,2.258338,2.259529,1.046453
2,76.398155,2.828499,6.177999,6.183016,1.074864
3,78.480515,2.800054,6.210745,6.23491,1.028877
4,79.815918,1.392867,4.19196,4.19498,1.054676


In [34]:
# # Create csv file to use it later for plotting of curves
# target_over_obs = data_over[['name', 'filter']]
# target_over_obs = target_over_obs.join(pred_denorm_over_df)
# target_over_obs = target_over_obs.join(pd.DataFrame(y, columns = ['inc', 'q', 'omega1', 'omega2', 't1_t2']))
# print(target_over_obs.head())

# Create csv file to use it later for plotting of curves
target_over_obs = data_over[['name', 'filter']]
target_over_obs["pred_omega1"] = pred_denorm_over_df["pred_omega1"] # pridanie true hodnot ako "predikovane" pre skusku
target_over_obs["pred_omega2"] = pred_denorm_over_df["pred_omega1"]
target_over_obs["pred_q"] = data_over["q"]
target_over_obs = target_over_obs.join(pred_denorm_over_df[["pred_inc", "pred_t1_t2"]]) # pridanie zvysnych predikovanych hodnot
target_over_obs = target_over_obs.join(pd.DataFrame(y_over, columns = ['inc', 'q', 'omega1', 'omega2', 't1_t2']))
print(target_over_obs.head())

     name filter  pred_omega1  pred_omega2  pred_q   pred_inc  pred_t1_t2  \
0  AH Aur      V     2.290690     2.290690   0.169  79.606049    1.017947   
1  AH Aur      B     2.258338     2.258338   0.169  78.161240    1.046453   
2   AKHer      V     6.177999     6.177999   0.270  76.398155    1.074864   
3   AKHer      I     6.210745     6.210745   0.270  78.480515    1.028877   
4   AKHer      R     4.191960     4.191960   0.270  79.815918    1.054676   

     inc      q  omega1  omega2    t1_t2  
0  75.46  0.169   2.080   2.080  1.01205  
1  75.46  0.169   2.080   2.080  1.01205  
2  81.70  0.270   2.357   2.357  1.05178  
3  81.70  0.270   2.357   2.357  1.05178  
4  81.70  0.270   2.357   2.357  1.05178  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_over_obs["pred_omega1"] = pred_denorm_over_df["pred_omega1"] # pridanie true hodnot ako "predikovane" pre skusku
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_over_obs["pred_omega2"] = pred_denorm_over_df["pred_omega1"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  target_over_obs["

In [35]:
# For purpose of plttoing curves from predicted parameters, it is necessary to convert inclination - degrees to radians

target_over_obs['pred_inc'] = target_over_obs['pred_inc'].astype(float).apply(math.radians)
target_over_obs['inc'] = target_over_obs['inc'].astype(float).apply(math.radians)

target_over_obs.head()

Unnamed: 0,name,filter,pred_omega1,pred_omega2,pred_q,pred_inc,pred_t1_t2,inc,q,omega1,omega2,t1_t2
0,AH Aur,V,2.29069,2.29069,0.169,1.389388,1.017947,1.317025,0.169,2.08,2.08,1.01205
1,AH Aur,B,2.258338,2.258338,0.169,1.364171,1.046453,1.317025,0.169,2.08,2.08,1.01205
2,AKHer,V,6.177999,6.177999,0.27,1.333399,1.074864,1.425934,0.27,2.357,2.357,1.05178
3,AKHer,I,6.210745,6.210745,0.27,1.369743,1.028877,1.425934,0.27,2.357,2.357,1.05178
4,AKHer,R,4.19196,4.19196,0.27,1.393051,1.054676,1.425934,0.27,2.357,2.357,1.05178


In [36]:
# Save true and predicted attributes (inclination in radians) to csv
target_over_obs.to_csv('ml_predictor_evaluator/src/target_over_obs.csv')

In [None]:
# Predicted mean values of selected attributes
pred_mean = pred_denorm_over_df.mean(axis=0)
pred_mean

In [None]:
true_over_df = pd.DataFrame(y,
                        columns = [
                        "inc",
                        "q",
                        "omega1",
                        "omega2",
                        "t1_t2"])
true_over_df.head()

In [None]:
true_mean = true_over_df.mean(axis=0)
true_mean

In [None]:
eval_pred = pd.DataFrame({'attribute': true_mean.index,
            'avg_true': true_mean.values,
            'avg_pred': pred_mean.values,
            'MAE': abs(true_mean.values - pred_mean.values)})
eval_pred