## Python skript je vytvorenie podmnozin:
* synteticke detached
* synteticke overcontact

Budu vytvorene unikatne podmnoziny pre unikatne filtre

In [1]:
# Libraries
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
from keras.models import load_model
from ast import literal_eval
from random import randint
from sklearn.preprocessing import MinMaxScaler

In [2]:
np.random.seed(1234)
pd.set_option('display.max_rows', None)

In [3]:
def generate_observation_sigma(space_obs_frac=0.5):
    """
    Draws a standard deviation of noise in light curve points from a "true" value provided in synthetic light curve.
    Noise sigma is drawn from bimodal distribution taking into account contributions from space based and earth based
    observations which have different levels of stochastic noise.

    :param space_obs_frac: ratio between earth based and space based observations
    :return: float; standard deviation of the light curve noise
    """
    earth_based_sigma = 4e-3
    space_based_sigma = 2e-4
    sigma = np.random.choice([earth_based_sigma, space_based_sigma], p=[1-space_obs_frac, space_obs_frac])
    return np.random.rayleigh(sigma)

def stochastic_noise_generator(curve):
    """
    Introduces gaussian noise into synthetic observation provided in `curve`.

    :param curve: numpy.array; normalized light curve
    :return: Tuple(numpy.array, float); normalized light curve with added noise, standard deviation of observations
    """
    sigma = generate_observation_sigma()
    return np.random.normal(curve, sigma), np.full(curve.shape, sigma)

In [4]:
# funkcia pre vytvorenie dat na plotovanie
def make_prediction(df, name_of_df, model):
    df = df.copy()
    scaler = MinMaxScaler()
    # vytvorenie pola kriviek
    X = []
    for row in df["curve"]:
        X.append(row)
    X = np.array(X)

    # vytvorenie pola atributov na predikciu
    y = np.array(df[["inclination",
                    "mass_ratio",
                    "primary__surface_potential",
                    "secondary__surface_potential",
                    "t1_t2"]])

    # normalizovanie hodnot atributov
    y_scaled = scaler.fit_transform(y)

    # predikcia
    y_pred_norm = model.predict(X)

    # denormalizacia predikcie + vytvorenie df
    y_pred_denorm = scaler.inverse_transform(y_pred_norm)
    denorm_df = pd.DataFrame(y_pred_denorm, columns = [
                        "pred_inc",
                        "pred_q",
                        "pred_omega1",
                        "pred_omega2",
                        "pred_t1_t2"])
    df["pred_inc"] = denorm_df["pred_inc"].values
    df["pred_q"] = denorm_df["pred_q"].values
    df["pred_omega1"] = denorm_df["pred_omega1"].values
    df["pred_omega2"] = denorm_df["pred_omega2"].values
    df["pred_t1_t2"] = denorm_df["pred_t1_t2"].values
    df.to_csv(f'data_to_plot/{name_of_df}.csv')

In [4]:
# funkcia pre vytvorenie overcontact dat na plotovanie -- rovnaka sigma
def make_prediction_same_sigma(df, name_of_df, model):
    df = df.copy()
    scaler = MinMaxScaler()
    # vytvorenie pola kriviek
    X = []
    for row in df["curve"]:
        X.append(row)
    X = np.array(X)

    # vytvorenie pola atributov na predikciu
    y = np.array(df[["inclination",
                    "mass_ratio",
                    "primary__surface_potential",
                    "secondary__surface_potential",
                    "t1_t2"]])

    # normalizovanie hodnot atributov
    y_scaled = scaler.fit_transform(y)

    # predikcia
    y_pred_norm = model.predict(X)

    # denormalizacia predikcie + vytvorenie df
    y_pred_denorm = scaler.inverse_transform(y_pred_norm)
    denorm_df = pd.DataFrame(y_pred_denorm, columns = [
                        "pred_inc",
                        "pred_q",
                        "pred_omega1",
                        "pred_omega2",
                        "pred_t1_t2"])
    df["pred_inc"] = denorm_df["pred_inc"].values
    df["pred_q"] = denorm_df["pred_q"].values
    df["pred_omega1"] = denorm_df["pred_omega1"].values
    df["pred_omega2"] = denorm_df["pred_omega1"].values
    df["pred_t1_t2"] = denorm_df["pred_t1_t2"].values
    df.to_csv(f'data_to_plot/{name_of_df}.csv')

## Detached synthetic

In [5]:
data_det = pd.read_pickle("detached_all_parameters.pkl").reset_index()

In [6]:
data_det.head()

Unnamed: 0,index,id,curve,primary__t_eff,secondary__t_eff,inclination,mass_ratio,primary__surface_potential,secondary__surface_potential,t1_t2,filter,critical_surface_potential,primary__equivalent_radius,secondary__equivalent_radius,primary__filling_factor,secondary__filling_factor
0,0,38,"[0.6055271686415179, 0.9842041250556204, 0.999...",7000,4000,1.560796,10.0,110.00005,996.5005,1.75,Bessell_U,15.09104,0.009996,0.009996,-145.333979,-1502.830354
1,1,38,"[0.608985656265516, 0.9846965713304289, 0.9998...",7000,4000,1.560796,10.0,110.00005,996.5005,1.75,Bessell_B,15.09104,0.009996,0.009996,-145.333979,-1502.830354
2,2,38,"[0.6189025614226916, 0.9837351924934223, 0.999...",7000,4000,1.560796,10.0,110.00005,996.5005,1.75,Bessell_V,15.09104,0.009996,0.009996,-145.333979,-1502.830354
3,3,38,"[0.6292771409565273, 0.9832675811171884, 0.999...",7000,4000,1.560796,10.0,110.00005,996.5005,1.75,Bessell_R,15.09104,0.009996,0.009996,-145.333979,-1502.830354
4,4,38,"[0.6543378609145588, 0.9835188424579704, 0.999...",7000,4000,1.560796,10.0,110.00005,996.5005,1.75,Bessell_I,15.09104,0.009996,0.009996,-145.333979,-1502.830354


In [6]:
print("Unique filters:" ,data_det["filter"].unique())

Unique filters: ['Bessell_U' 'Bessell_B' 'Bessell_V' 'Bessell_R' 'Bessell_I' 'SLOAN_u'
 'SLOAN_g' 'SLOAN_r' 'SLOAN_i' 'SLOAN_z' 'Kepler' 'GaiaDR2' 'TESS']


In [7]:
data_to_predict = data_det[["id", "curve", "filter", "inclination", "mass_ratio", "primary__surface_potential", "secondary__surface_potential", "t1_t2"]]
data_Bessel_U = data_to_predict[data_to_predict["filter"] == "Bessell_U"].sample(100)
data_Bessell_B = data_to_predict[data_to_predict["filter"] == "Bessell_B"].sample(100)
data_Bessell_V = data_to_predict[data_to_predict["filter"] == "Bessell_V"].sample(100)
data_Bessell_R = data_to_predict[data_to_predict["filter"] == "Bessell_R"].sample(100)
data_Bessell_I = data_to_predict[data_to_predict["filter"] == "Bessell_I"].sample(100)
data_SLOAN_u = data_to_predict[data_to_predict["filter"] == "SLOAN_u"].sample(100)
data_SLOAN_g = data_to_predict[data_to_predict["filter"] == "SLOAN_g"].sample(100)
data_SLOAN_r = data_to_predict[data_to_predict["filter"] == "SLOAN_r"].sample(100)
data_SLOAN_i = data_to_predict[data_to_predict["filter"] == "SLOAN_i"].sample(100)
data_SLOAN_z = data_to_predict[data_to_predict["filter"] == "SLOAN_z"].sample(100)
data_Kepler = data_to_predict[data_to_predict["filter"] == "Kepler"].sample(100)
data_GaiaDR2 = data_to_predict[data_to_predict["filter"] == "GaiaDR2"].sample(100)
data_tess = data_to_predict[data_to_predict["filter"] == "TESS"].sample(100)

In [9]:
data_Bessel_U.head()

Unnamed: 0,id,curve,filter,inclination,mass_ratio,primary__surface_potential,secondary__surface_potential,t1_t2
175552,2544925,"[0.9826834614014517, 0.9827279960224328, 0.982...",Bessell_U,1.509858,0.2,3.690822,21.40001,2.285714
943787,14229518,"[0.014730435037305312, 0.014730549152163118, 0...",Bessell_U,1.459889,1.25,21.251253,8.24631,4.0
548821,8272207,"[0.18920356263351523, 0.18920335317112438, 0.1...",Bessell_U,1.518192,2.0,13.11521,12.293912,1.666667
1269450,18461041,"[0.6751043519133935, 0.6756121188874721, 0.677...",Bessell_U,1.261432,1.0,4.492805,4.492805,1.166667
1288560,18604195,"[0.25537422283662325, 0.25668728995951995, 0.2...",Bessell_U,1.283999,1.0,12.115186,3.777465,2.857143


In [8]:
model = load_model("models/norm_detached_selection_v5.hdf5")

In [9]:
make_prediction(data_Bessel_U, "det_data_Bessel_U", model)
make_prediction(data_Bessell_B, "det_data_Bessell_B", model)
make_prediction(data_Bessell_V, "det_data_Bessell_V", model)
make_prediction(data_Bessell_R, "det_data_Bessell_R", model)
make_prediction(data_Bessell_I, "det_data_Bessell_I", model)
make_prediction(data_SLOAN_u, "det_data_SLOAN_u", model)
make_prediction(data_SLOAN_g, "det_data_SLOAN_g", model)
make_prediction(data_SLOAN_r, "det_data_SLOAN_r", model)
make_prediction(data_SLOAN_i, "det_data_SLOAN_i", model)
make_prediction(data_SLOAN_z, "det_data_SLOAN_z", model)
make_prediction(data_Kepler, "det_data_Kepler", model)
make_prediction(data_GaiaDR2, "det_data_GaiaDR2", model)
make_prediction(data_tess, "det_data_TESS", model)

## Overcontact synthetic

In [6]:
data_over = pd.read_pickle("overcontact_all_parameters.pkl").reset_index()

In [7]:
data_over.rename(columns = {'t1/t2':'t1_t2'}, inplace = True)
data_over.head()

Unnamed: 0,index,id,curve,primary__t_eff,secondary__t_eff,inclination,mass_ratio,primary__surface_potential,secondary__surface_potential,t1_t2,filter,critical_surface_potential,primary__equivalent_radius,secondary__equivalent_radius,primary__filling_factor,secondary__filling_factor
0,0,5525038,"[0.9271109336686163, 0.9271335908185164, 0.927...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_U,1.959104,0.585781,0.21126,0.169244,0.169244
1,1,5525038,"[0.9267426667358384, 0.9267640025030627, 0.926...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_B,1.959104,0.585781,0.21126,0.169244,0.169244
2,2,5525038,"[0.9271736551553694, 0.927193188167849, 0.9272...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_V,1.959104,0.585781,0.21126,0.169244,0.169244
3,3,5525038,"[0.9286697051715368, 0.9286879105609007, 0.928...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_R,1.959104,0.585781,0.21126,0.169244,0.169244
4,4,5525038,"[0.9304596200748534, 0.9304764401089076, 0.930...",5500,5250,0.766994,0.1,1.948052,1.948052,1.047619,Bessell_I,1.959104,0.585781,0.21126,0.169244,0.169244


In [22]:
print("Unique filters:" ,data_over["filter"].unique())

Unique filters: ['Bessell_U' 'Bessell_B' 'Bessell_V' 'Bessell_R' 'Bessell_I' 'SLOAN_u'
 'SLOAN_g' 'SLOAN_r' 'SLOAN_i' 'SLOAN_z' 'Kepler' 'GaiaDR2' 'TESS']


In [8]:
data_to_predict = data_over[["id", "curve", "filter", "inclination", "mass_ratio", "primary__surface_potential", "secondary__surface_potential", "t1_t2"]]
data_Bessel_U = data_to_predict[data_to_predict["filter"] == "Bessell_U"].sample(100)
data_Bessell_B = data_to_predict[data_to_predict["filter"] == "Bessell_B"].sample(100)
data_Bessell_V = data_to_predict[data_to_predict["filter"] == "Bessell_V"].sample(100)
data_Bessell_R = data_to_predict[data_to_predict["filter"] == "Bessell_R"].sample(100)
data_Bessell_I = data_to_predict[data_to_predict["filter"] == "Bessell_I"].sample(100)
data_SLOAN_u = data_to_predict[data_to_predict["filter"] == "SLOAN_u"].sample(100)
data_SLOAN_g = data_to_predict[data_to_predict["filter"] == "SLOAN_g"].sample(100)
data_SLOAN_r = data_to_predict[data_to_predict["filter"] == "SLOAN_r"].sample(100)
data_SLOAN_i = data_to_predict[data_to_predict["filter"] == "SLOAN_i"].sample(100)
data_SLOAN_z = data_to_predict[data_to_predict["filter"] == "SLOAN_z"].sample(100)
data_Kepler = data_to_predict[data_to_predict["filter"] == "Kepler"].sample(100)
data_GaiaDR2 = data_to_predict[data_to_predict["filter"] == "GaiaDR2"].sample(100)
data_tess = data_to_predict[data_to_predict["filter"] == "TESS"].sample(100)

In [25]:
data_Bessel_U.head()

Unnamed: 0,id,curve,filter,inclination,mass_ratio,primary__surface_potential,secondary__surface_potential,t1_t2
90766,19334309,"[0.7357760724290606, 0.7358980481314182, 0.736...",Bessell_U,1.136114,3.333333,7.007351,7.007351,1.0
635583,56079070,"[0.20470982847765304, 0.20606688157159928, 0.2...",Bessell_U,1.53986,0.8,3.215234,3.215234,1.052632
873132,64152363,"[0.5183017782839744, 0.5184583481492434, 0.518...",Bessell_U,1.188822,1.111111,3.499208,3.499208,1.0
849420,63938802,"[0.22943193347289692, 0.23022969598175344, 0.2...",Bessell_U,1.470485,0.9,3.240062,3.240062,1.05
775541,63090330,"[0.5321946797638034, 0.5324898684810483, 0.533...",Bessell_U,1.257056,1.111111,3.808497,3.808497,1.083333


In [9]:
model = load_model("models/norm_overcontact_selection.hdf5")

In [27]:
make_prediction(data_Bessel_U, "over_data_Bessel_U", model)
make_prediction(data_Bessell_B, "over_data_Bessell_B", model)
make_prediction(data_Bessell_V, "over_data_Bessell_V", model)
make_prediction(data_Bessell_R, "over_data_Bessell_R", model)
make_prediction(data_Bessell_I, "over_data_Bessell_I", model)
make_prediction(data_SLOAN_u, "over_data_SLOAN_u", model)
make_prediction(data_SLOAN_g, "over_data_SLOAN_g", model)
make_prediction(data_SLOAN_r, "over_data_SLOAN_r", model)
make_prediction(data_SLOAN_i, "over_data_SLOAN_i", model)
make_prediction(data_SLOAN_z, "over_data_SLOAN_z", model)
make_prediction(data_Kepler, "over_data_Kepler", model)
make_prediction(data_GaiaDR2, "over_data_GaiaDR2", model)
make_prediction(data_tess, "over_data_TESS", model)

In [10]:
make_prediction_same_sigma(data_Bessel_U, "over_data_Bessel_U", model)
make_prediction_same_sigma(data_Bessell_B, "over_data_Bessell_B", model)
make_prediction_same_sigma(data_Bessell_V, "over_data_Bessell_V", model)
make_prediction_same_sigma(data_Bessell_R, "over_data_Bessell_R", model)
make_prediction_same_sigma(data_Bessell_I, "over_data_Bessell_I", model)
make_prediction_same_sigma(data_SLOAN_u, "over_data_SLOAN_u", model)
make_prediction_same_sigma(data_SLOAN_g, "over_data_SLOAN_g", model)
make_prediction_same_sigma(data_SLOAN_r, "over_data_SLOAN_r", model)
make_prediction_same_sigma(data_SLOAN_i, "over_data_SLOAN_i", model)
make_prediction_same_sigma(data_SLOAN_z, "over_data_SLOAN_z", model)
make_prediction_same_sigma(data_Kepler, "over_data_Kepler", model)
make_prediction_same_sigma(data_GaiaDR2, "over_data_GaiaDR2", model)
make_prediction_same_sigma(data_tess, "over_data_TESS", model)