# Predikcia detached kriviek vybranych parametrov
### Model NN s concatenate vrstvou

In [1]:
# LIBRARIES
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from keras.utils import np_utils
from keras.models import load_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import precision_recall_fscore_support
from keras.layers import Conv1D, GlobalMaxPooling1D, MaxPooling1D, SpatialDropout1D, GlobalAveragePooling1D
from keras.layers import Input, Dense, concatenate, Activation, LSTM, Dropout, Flatten
from keras.models import Model
from keras.layers.merge import Concatenate
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

np.random.seed(1234)
pd.set_option('display.max_rows', None)

In [2]:
def generate_observation_sigma(space_obs_frac=0.5):
    """
    Draws a standard deviation of noise in light curve points from a "true" value provided in synthetic light curve.
    Noise sigma is drawn from bimodal distribution taking into account contributions from space based and earth based
    observations which have different levels of stochastic noise.

    :param space_obs_frac: ratio between earth based and space based observations
    :return: float; standard deviation of the light curve noise
    """
    earth_based_sigma = 4e-3
    space_based_sigma = 2e-4
    sigma = np.random.choice([earth_based_sigma, space_based_sigma], p=[1-space_obs_frac, space_obs_frac])
    return np.random.rayleigh(sigma)

def stochastic_noise_generator(curve):
    """
    Introduces gaussian noise into synthetic observation provided in `curve`.

    :param curve: numpy.array; normalized light curve
    :return: Tuple(numpy.array, float); normalized light curve with added noise, standard deviation of observations
    """
    sigma = generate_observation_sigma()
    return np.random.normal(curve, sigma), np.full(curve.shape, sigma)

## Data loading

In [3]:
data = pd.read_pickle("detached_all_parameters.pkl").reset_index()
data_sample = data.sample(n=200000)

In [4]:
X = []
for row in data_sample["curve"]:
    X.append(row)
X=np.array(X)

In [5]:
y = np.array(data_sample[[
    "inclination",
    "mass_ratio",
    "primary__surface_potential",
    "secondary__surface_potential",
    "t1_t2"]])

In [6]:
# MinMax Scaler
scaler = MinMaxScaler()
y_minmax_scaled = scaler.fit_transform(y)
y_minmax_scaled[0]

array([0.71584635, 0.05050505, 0.17208073, 0.0020424 , 0.7804878 ])

In [7]:
X_train1, X_test, y_train1, y_test = train_test_split(X, y_minmax_scaled, test_size=0.2)

In [8]:
# Adding noise to train data
X_train_n = []
y_train_n = []
for i in range(len(X_train1)):
    for j in range(3):
        curve = stochastic_noise_generator(X_train1[i])
        X_train_n.append(curve[0])
        y_train_n.append(y_train1[i])
X_train_n = np.array(X_train_n)
y_train_n=np.array(y_train_n)

In [9]:
print("Number of records in dataset: ", len(data),
    "\nNumber of records in sample: ", len(X),
    "\nNumber of train data without noise: ", len(X_train1),
    "\nNumber of train data with noise: ", len(X_train_n),
    "\nNumber of test data without noise: ", len(X_test))

Number of records in dataset:  1300000 
Number of records in sample:  200000 
Number of train data without noise:  160000 
Number of train data with noise:  480000 
Number of test data without noise:  40000


In [11]:
y_train_n.shape

(480000, 5)

In [12]:
y_inc = np.array([arr[0] for arr in y_train_n])
y_mass = np.array([arr[1] for arr in y_train_n])
y_prim_potent = np.array([arr[2] for arr in y_train_n])
y_sec_potent = np.array([arr[3] for arr in y_train_n])
y_temp_ratio = np.array([arr[4] for arr in y_train_n])

In [16]:
y_inc.shape

(480000,)

## Model

In [29]:
inputs_y = Input(shape=(X_train_n.shape[1], 1))

a = Conv1D(64, kernel_size = 3, padding = "valid")(inputs_y)
a = MaxPooling1D(2)(a)
a = Conv1D(32, kernel_size = 3, padding = "valid")(a)
a = Dense(32, activation='relu')(a)
a = Model(inputs=inputs_y, outputs=a)

b = Conv1D(64, kernel_size = 3, padding = "valid")(inputs_y)
b = MaxPooling1D(2)(b)
b = Conv1D(32, kernel_size = 3, padding = "valid")(b)
b = Dense(32, activation='relu')(b)
b = Model(inputs=inputs_y, outputs=b)

c = Conv1D(64, kernel_size = 3, padding = "valid")(inputs_y)
c = MaxPooling1D(2)(c)
c = Conv1D(32, kernel_size = 3, padding = "valid")(c)
c = Dense(32, activation='relu')(c)
c = Model(inputs=inputs_y, outputs=c)

d = Conv1D(64, kernel_size = 3, padding = "valid")(inputs_y)
d = MaxPooling1D(2)(d)
d = Conv1D(32, kernel_size = 3, padding = "valid")(d)
d = Dense(32, activation='relu')(d)
d = Model(inputs=inputs_y, outputs=d)

e = Conv1D(64, kernel_size = 3, padding = "valid")(inputs_y)
e = MaxPooling1D(2)(e)
e = Conv1D(32, kernel_size = 3, padding = "valid")(e)
e = Dense(32, activation='relu')(e)
e = Model(inputs=inputs_y, outputs=e)

x = concatenate([a.output, b.output, c.output, d.output, e.output])
x = Flatten()(x)
x = Dense(32, activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)

output = Dense(5, activation='linear')(x)

model_multi = Model(inputs=inputs_y, outputs=output)
model_multi.compile(loss='mse', optimizer='adam', metrics=["mae", "mape"])
print(model_multi.summary())

Model: "functional_47"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 400, 1)]     0                                            
__________________________________________________________________________________________________
conv1d_31 (Conv1D)              (None, 398, 64)      256         input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_33 (Conv1D)              (None, 398, 64)      256         input_6[0][0]                    
__________________________________________________________________________________________________
conv1d_35 (Conv1D)              (None, 398, 64)      256         input_6[0][0]                    
______________________________________________________________________________________

In [30]:
saved_model = "models/norm_det_multi_v1.hdf5"
checkpoint = ModelCheckpoint(saved_model, monitor = 'val_mae', verbose = 1, save_best_only = True, mode = 'min')
early = EarlyStopping(monitor = "val_mae", mode = "min", patience = 25)
callbacks_list = [checkpoint, early]

In [31]:
history_multi = model_multi.fit(
    x=X_train_n,
    y=[y_inc, y_mass, y_prim_potent, y_sec_potent, y_temp_ratio],
    validation_split = 0.1,
    epochs = 10,
    verbose = 1,
    callbacks = callbacks_list,
    batch_size = 64)

Epoch 1/10
 176/6750 [..............................] - ETA: 20:36 - loss: 0.0737 - mae: 0.2013 - mape: 69728.0625

KeyboardInterrupt: 