In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load flux sequences
flux_sequences = np.load(r"C:\Users\Roshen Hasangha\Desktop\dataset\flux_sequences.npy", allow_pickle=True)
flux_kepler_ids = np.load(r"C:\Users\Roshen Hasangha\Desktop\dataset\flux_kepler_ids.npy")

merged_df = pd.read_csv(r"C:\Users\Roshen Hasangha\Desktop\dataset\final_multimodal_dataset.csv")

id_to_seq = {k: np.array(seq) for k, seq in zip(flux_kepler_ids, flux_sequences)}

matched_seqs, matched_labels = [], []
for _, row in merged_df.iterrows():
    kepler_id = row['Kepler_ID']
    if kepler_id in id_to_seq and len(id_to_seq[kepler_id]) == 512:
        matched_seqs.append(id_to_seq[kepler_id])
        matched_labels.append(row['label'])

X_seq = np.array(matched_seqs, dtype=np.float32)
y = np.array(matched_labels)

# Standardize flux data (critical for MLP)
scaler_flux = StandardScaler()
X_seq_scaled = scaler_flux.fit_transform(X_seq)

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(
    X_seq_scaled, y, test_size=0.2, random_state=42, stratify=y
)

print(f"✅ Data shape: Train {X_train.shape}, Test {X_test.shape}")


✅ Data shape: Train (1128, 512), Test (283, 512)


In [2]:
import tensorflow as tf

def build_mlp_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(input_shape,)),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

model_mlp_flux = build_mlp_model(X_train.shape[1])
model_mlp_flux.summary()





Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               65664     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 76033 (297.00 KB)
Trainable params: 760

In [13]:
history_mlp_flux = model_mlp_flux.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=25,
    batch_size=32,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)]
)


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
