In [5]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


In [6]:
dftrain = pd.read_csv("data/plasticc_train_metadata.csv")

dftest = pd.read_csv("data/plasticc_test_metadata.csv")

dftest = dftest[:20000]

dftest['target'] = dftest['true_target']

dftrain = pd.concat([dftrain, dftest], axis=0, ignore_index=True)

model_nums = {90:'SN Ia', 67:'SNIa-91bg', 52:'SNIax', 42:'SNII', 62:'SNIbc', 95:'SLSN-I', 15:'TDE', 64:'KN', 88:'AGN', 92:'RRL', 65:'M-dwarf', 16:'EB', 53:'Mira', 6:'muLens-Single', 991:'muLens-Binary', 992:'ILOT', 993:'CaRT', 994:'PISN', 995:'muLens-String'}

sn_models = [90, 67, 52, 42, 62, 95, 15]

dftrain = dftrain[dftrain['target'].isin(sn_models)]

dftrain['target_names'] = dftrain['target'].map(model_nums)

FileNotFoundError: [Errno 2] No such file or directory: 'data/plasticc_train_metadata.csv'

In [None]:
# print(dftrain)

In [None]:
# print(dftrain[dftrain['object_id'] == 50409])

In [None]:
lcs = pd.read_csv("data/plasticc_train_lightcurves.csv")
lcs_test = pd.read_csv("data/plasticc_test_lightcurves_01.csv")

lcs = pd.concat([lcs, lcs_test[:3000000]], axis=0, ignore_index=True)

# lcs = pd.merge(lcs, dftrain, on='object_id', how='inner')
# lcs['passband']

In [None]:
# #map passband idx to passband name
# lcs['passband_name'] = ["ugrizY"[i] for i in lcs['passband']]
# lcs_detected = lcs[lcs['detected_bool'] == True]
# cols = sns.color_palette("Spectral_r", 6)
# VRO_bands = "ugrizY"

# transients = np.unique(lcs_detected['object_id'])


# fig, axs = plt.subplots(4, 5, figsize=(20, 15), sharex=True, sharey=False)
# axs = axs.ravel()

# for j in np.arange(20):
#     transient = transients[j]
#     lc = lcs_detected[lcs_detected['object_id'] == transient]
#     for i in np.arange(len(VRO_bands)):
#         band = VRO_bands[i]
#         lc_band = lc[lc['passband_name'] == band]
#         axs[j].errorbar(lc_band['mjd'] - np.nanmin(lc['mjd']), lc_band['flux'], yerr=lc_band['flux_err'], fmt='o', mec='k', c=cols[i], label=band)
#     axs[j].set_ylabel("Flux")
#     axs[j].set_title(dftrain.loc[dftrain['object_id'] == transient, 'target_names'].values[0])
# axs[-1].legend()
# axs[0].set_xlim((-10, 200));
# for idx in np.arange(15, 20):
#     axs[idx].set_xlabel("Days since Discovery");
# fig.tight_layout(w_pad=0.1, h_pad=0.3)

In [None]:
columns = ['object_id', 'mjd', 'passband', 'flux', 'flux_err']
columns2 = ['mjd', 'passband', 'flux', 'flux_err']
# filter wavelengths in angstroms
wavelengths = {
    0: 3671.0,
    1: 4827.0,
    2: 6223.0,
    3: 7546.0,
    4: 8691.0,
    5: 9712.0
}
# print(lcs['passband'])
lcs['passband'] = lcs['passband'].map(wavelengths)
# lcs['passband']

In [None]:
# Prepare Train Data
x_data = []
y_data = []

max_length = 352

for id in dftrain['object_id']:
    if len(lcs[lcs['object_id'] == id]) == 0:
        continue
    x_data.append(lcs[lcs['object_id'] == id][columns].reset_index())
    #print(x_train[-1])
    y_data.append(dftrain[dftrain['object_id'] == id][['object_id', 'true_peakmjd']].values.tolist()[0])

    # Expand light curve data to max length with 0s
    while len(x_data[-1].index) < max_length: 
        x_data[-1].loc[len(x_data[-1].index)] = [len(x_data[-1].index), id, 0, 0, 0, 0]
    x_data[-1] = x_data[-1][columns]
    #print(x_train[-1])


In [None]:
# Normalize Data
for i in range(len(x_data)):
    # try:
    #     x_train[i] = x_train[i].values
    # except AttributeError:
    #     pass
    y_data[i][1] -= 59000
    y_data[i][1] /= 2000
    for j in range(len(x_data[i])):
        if x_data[i].loc[j]['mjd'] == 0:
            continue
        x_data[i].at[j, 'mjd'] -= 59000
        x_data[i].at[j, 'mjd'] /= 2000
        x_data[i].at[j, 'passband'] -= 3671
        x_data[i].at[j, 'passband'] /= 6041

In [None]:
x_train, x_test, y_train, y_test = train_test_split(
     x_data, y_data, test_size=0.2, random_state=42)

In [None]:
x_test, x_val, y_test, y_val = train_test_split(
    x_test, y_test, test_size=0.5, random_state=45)

In [None]:
# print(y_test)

In [None]:
# print(dftrain[dftrain['object_id'] == 50409]])

In [None]:

# print(x_train)
x_train_values = []
for i in range(len(x_train)):
    x_train_values.append([])
    for row in x_train[i].values:
        x_train_values[-1].append(row[1:])
y_train_values = [np.array(y[1]) for y in y_train]

x_test_values = []
for i in range(len(x_test)):
    x_test_values.append([])
    for row in x_test[i].values:
        x_test_values[-1].append(row[1:])
y_test_values = [np.array(y[1]) for y in y_test]

x_val_values = []
for i in range(len(x_val)):
    x_val_values.append([])
    for row in x_val[i].values:
        x_val_values[-1].append(row[1:])
y_val_values = [np.array(y[1]) for y in y_val]

# dftest = pd.read_csv('data/plasticc_test_metadata.csv')

# lcs_test = pd.read_csv('data/plasticc_test_lightcurves_01.csv')
# lcs_val = pd.read_csv('data/plasticc_test_lightcurves_02.csv')

# print(lcs, lcs_test, lcs_val)

# lcs_test['passband'] = lcs_test['passband'].map(wavelengths)
# lcs_val['passband'] = lcs_val['passband'].map(wavelengths)

# dftest = dftest[dftest['true_target'].isin(sn_models)]

# dftest['target_names'] = dftest['true_target'].map(model_nums)

# # lcs_test = pd.merge(lcs_test, dftest, on='object_id', how='inner')
# # lcs_val = pd.merge(lcs_val, dftest, on='object_id', how='inner')

# x_test, x_val, y_test, y_val = [], [], [], []

In [None]:
# for id in dftest['object_id']:
#     # print(id)
#     if len(lcs_test[lcs_test['object_id'] == id]) > 0:
#         # print("test: " + id, flush=True)
#         x_test.append(lcs_test[lcs_test['object_id'] == id][columns].reset_index())
#         y_test.append(dftest[dftest['object_id'] == id]['true_peakmjd'])
#         print(len(x_test[-1].index)
#         # while len(x_test[-1].index) < max_length:
#         #     x_test[-1].loc[len(x_test[-1].index)] = [len(x_test[-1].index), 0, 0, 0, 0]
#         # x_test[-1] = x_test[-1][columns]

# print(x_test)


In [None]:
# for id in dftest['object_id']:
#     if len(lcs_val[lcs_val['object_id'] == id]) > 0:
#         # print("val: " + id, flush=True)
#         x_val.append(lcs_val[lcs_val['object_id'] == id][columns].reset_index())
#         y_val.append(dftest[dftest['object_id'] == id]['true_peakmjd'])
#         while len(x_val[-1].index) < max_length:
#             x_val[-1].loc[len(x_val[-1].index)] = [len(x_val[-1].index), 0, 0, 0, 0]
#         x_val[-1] = x_val[-1][columns]

In [None]:
# print(x_train_values[:2])
# y_train_values[:2]

In [None]:
# Build Model

import tensorflow as tf
# import tensorflow_probability as tfp

tf.random.set_seed(1)

model = tf.keras.Sequential([
    tf.keras.layers.Input((352, 4)),
    tf.keras.layers.Masking(mask_value=tf.convert_to_tensor(np.array([0.0, 0.0, 0.0, 0.0], dtype=np.float32))),
    tf.keras.layers.GRU(50, return_sequences=True, activation='tanh'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.GRU(50,activation='tanh'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=1, activation='sigmoid')])
    # tfp.layers.DistributionLambda(lambda t: tfp.distributions.Normal(loc=t[..., :1],
                           #scale=1e-3 + tf.math.softplus(0.05 * t[...,1:])))])


model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.002),loss='mean_squared_error')
# print(model.summary())

In [None]:
print(len(x_train_values), len(y_train_values))

11132 11132


In [None]:
### Train Model
model.fit(np.array(x_train_values), np.array(y_train_values), epochs=1, validation_data=(np.array(x_val_values), np.array(y_val_values)))



[1m348/348[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 170ms/step - loss: 0.0135 - val_loss: 0.0030


<keras.src.callbacks.history.History at 0x3f02be650>

In [None]:
model.save('model.h5')

In [None]:
import pickle

with open('x_test_values.pickle', 'wb') as file:
    pickle.dump(x_test_values, file)

with open('y_test.pickle', 'wb') as file:
    pickle.dump(y_test, file)

In [None]:
print(np.array(x_test_values).shape)
y_pred = model.predict(np.array(x_test_values))

(1392, 352, 4)
[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 41ms/step


In [None]:
model2 = tf.keras.models.load_model('model.h5')
y_pred = model2.predict(np.array(x_test_values))

TypeError: Exception encountered when calling NotEqual.call().

[1mFailed to convert elements of {'class_name': '__tensor__', 'config': {'dtype': 'float64', 'value': [0.0, 0.0, 0.0, 0.0]}} to Tensor. Consider casting elements to a supported type. See https://www.tensorflow.org/api_docs/python/tf/dtypes for supported TF dtypes.[0m

Arguments received by NotEqual.call():
  • x1=tf.Tensor(shape=(32, 352, 4), dtype=float32)
  • x2={'class_name': "'__tensor__'", 'config': {'dtype': "'float64'", 'value': ['0.0', '0.0', '0.0', '0.0']}}

In [None]:
print(np.array(x_test_values).shape)
print(model.layers[0].mask_value)

In [None]:
def display_object(object_id, lightcurve_data, metadata, predicted=0):
    lightcurve_data = lightcurve_data[lightcurve_data['object_id'] == object_id]
    passbands = []
    true_peaktime = metadata[metadata['object_id'] == object_id]['true_peakmjd']
    print(true_peaktime)

    for freq in range(6):
        passbands.append(lightcurve_data[lightcurve_data['passband'] == freq][['mjd', 'flux']])

    for p in passbands:
        plt.scatter(p['mjd'], p['flux'])
        print(true_peaktime)
        plt.scatter(true_peaktime, [0])
        if predicted > 0:
            plt.scatter([predicted], [0])
        

    # plt.xlabel("Modified Julien Date")

    # plt.ylabel("Flux")
    
    # plt.show()
    plt.show()


In [None]:
display_object(95564, lcs, dftrain, 0.6931620000000003*2000+59000)