In [11]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

import tensorflow as tf
import time
from tensorflow.keras import layers
from sklearn.metrics import f1_score
import os

In [12]:
# Load the dataset 
data_path = '/Users/ec270266/Documents/Phd/Euclid/dev/feature-sed-pred/sed_spectral_classification/output/psf_dataset/'
# output_path = data_path
train_dataset = np.load(data_path + 'train_12000_stars_id_002_8bins.npy', allow_pickle=True)[()]
test_dataset = np.load(data_path + 'test_1000_stars_id_002_8bins.npy', allow_pickle=True)[()]

In [13]:
def perform_PCA(N_components, fit_selection, *transform_selection):
    """Performs PCA decomposition on star sets and returns a PCA star sets.

    Args:
        N_components (int): Number of PCA components to do the analysis.
        fit_selection: Star set (in 32x32 float array format) to do the PCA fit.
        transform_selection: Variable amount of star sets to do the PCA transform on.
        
    Returns: 
        List: Processed PCA star sets.
    """
    pca = PCA(n_components= N_components) 
    pca.fit(fit_selection.reshape(-1, 1024))

    pca_processed_sets = []
    for i in range(len(transform_selection)):
        pca_processed_sets.append(pca.transform(transform_selection[i].reshape(-1, 1024)))

    return pca_processed_sets

In [14]:
def SEDlisttoC(SED_list):
    """Converts a stellar class (1 to 13) to the regression parameter C."""
    sed_array = np.array(SED_list)
    return sed_array*0.5 + 1.5

def CtoSEDarray(c_values, variance):
    """Converts the regression parameter C back to a stellar class.
    If the C is out of bounds or its variance is too high it is classified as an anomaly (stellar class 20)."""
    sed_classes = np.rint(((c_values - 1.5) * 2)).astype(int)
    sed_classes = np.where((c_values < 1.4) | (c_values > 7.6), 13, sed_classes)
    sed_classes = np.where((variance > 1.00), 13, sed_classes)
    return sed_classes

def calculate_success_rate(confusion_matrix):
    """Metric that contemplates success as the true spectral class with a tolerance of one adjacent class."""
    diagonal = np.trace(confusion_matrix)
    diagonal_neighbors = np.sum(np.diagonal(confusion_matrix, offset=1)) + np.sum(np.diagonal(confusion_matrix, offset=-1))
    total_classified = np.sum(confusion_matrix)
    
    success_rate = (diagonal + diagonal_neighbors) / total_classified
    return success_rate

In [15]:
PCA_components = 24
model_learning_rate = 0.1

# Define the model architecture
def create_model():
    """Creates a network with the original architecture."""
    initializer = tf.keras.initializers.GlorotNormal(seed = None)
    model = tf.keras.Sequential([
        layers.Dense(26, input_shape=[PCA_components], activation='sigmoid', kernel_initializer= initializer),
        layers.Dense(26, activation='sigmoid', kernel_initializer= initializer),
        layers.Dense(1, activation = 'linear', kernel_initializer= initializer)
    ])
    model.compile(
        loss = tf.keras.losses.MeanSquaredError(),
        optimizer = tf.keras.optimizers.legacy.Adam(learning_rate = model_learning_rate)
    )
    return model

class TrainingCompletionCallback(tf.keras.callbacks.Callback):
    """Callback to display training information only at the end of the training of one network."""
    def on_train_end(self, logs=None):
        epochs = len(self.model.history.history['loss'])
        final_loss = self.model.history.history['loss'][-1]
        final_val_loss = self.model.history.history['val_loss'][-1]

        print("Training completed. Number of epochs:", epochs, ", Final training loss:", final_loss, ", Final validation loss:", final_val_loss)


In [17]:
rand_runs = 10
# Hyperparameters
N_components = 24
N_epochs = 100
N_committee = 48
patience_epochs = 20

train_stars_offset = 2000
noisy_train_stars = train_dataset['noisy_stars'][train_stars_offset:]
noisy_test_stars = test_dataset['noisy_stars']
train_labels = train_dataset['SED_ids'][train_stars_offset:]
test_labels = test_dataset['SED_ids']

PCA_cm_list = []

for run in range(rand_runs):

    # Shuffle the datasets
    X_train_test = np.concatenate((noisy_train_stars,noisy_test_stars))
    Y_train_test = np.concatenate((train_labels,test_labels))
    randomize = np.arange(len(X_train_test))
    np.random.shuffle(randomize)
    X_train_test_rnd = X_train_test[randomize]
    Y_train_test_rnd = np.array(Y_train_test)[randomize]
    # Split the dataset into train and test
    x_train_stars = X_train_test_rnd[:noisy_train_stars.shape[0]]
    y_train_idx = Y_train_test_rnd[:noisy_train_stars.shape[0]]
    x_test_stars = X_train_test_rnd[noisy_train_stars.shape[0]:]
    y_test_idx = Y_train_test_rnd[noisy_train_stars.shape[0]:]

    fit_selection = x_train_stars
    train_size=len(x_train_stars)

    x_train, = perform_PCA(N_components, fit_selection, x_train_stars)
    y_train = SEDlisttoC(y_train_idx)
    SED_train = y_train_idx

    x_test,  = perform_PCA(N_components, fit_selection, x_test_stars)
    y_test = SEDlisttoC(y_test_idx)
    SED_test = y_test_idx

    x_val, = perform_PCA(N_components, fit_selection, train_dataset['noisy_stars'][:train_stars_offset])
    y_val = SEDlisttoC(train_dataset['SED_ids'][:train_stars_offset])
    SED_val = train_dataset['SED_ids'][:train_stars_offset]

    # Create models and train them
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor = "val_loss", patience = patience_epochs, restore_best_weights=True)
    completion_callback = TrainingCompletionCallback()

    start_time = time.time() # Measure training time

    # Train a committee of 48 neural networks
    committee = []
    for i in range(N_committee):
        model = create_model()
        print("Training network #",i)
        learning = model.fit(x_train, y_train, epochs= N_epochs, verbose = 0, callbacks = [completion_callback, early_stopping], validation_data=(x_val,y_val))
        committee.append(model)   

    end_time = time.time()
    training_time = end_time - start_time
    print("Total training time:", training_time, "seconds")

    # Evaluate the committee
    x=x_test
    y_true=y_test
    SED_true=SED_test

    committee_predictions = []
    for model in committee:
        committee_predictions.append(model.predict(x, verbose = 0).reshape(-1)) # Predict the scalar parameter C using the committee   
    committee_predictions = np.array(committee_predictions)
    y_pred = np.mean(committee_predictions, axis=0)
    pred_variance = np.var(committee_predictions, axis=0)
    SED_pred = CtoSEDarray(y_pred,pred_variance)

    mse = np.mean((y_true - y_pred)**2)
    print('MSE:', mse)
    # print("Variance: ", pred_variance)

    f1 = f1_score(SED_true, SED_pred, average = None)
    f1_mean = np.mean(f1[:13])
    print('\nF1 score:', f1)
    print('Average F1 score:', f1_mean)

    confusion_matrix = tf.math.confusion_matrix(SED_true, SED_pred) 
    print("\nConfusion matrix:")
    print(confusion_matrix)

    success_rate = calculate_success_rate(confusion_matrix)
    print('\nSuccess rate:', success_rate)

    PCA_cm_list.append(confusion_matrix)

# Extract data from tensor
PCA_cm_list_np = [np.array(cm_.numpy().tolist()) for cm_ in PCA_cm_list]
PCA_cm_list_np[4] = PCA_cm_list_np[4][:-1,:-1]
PCA_cm_list_np[9] = PCA_cm_list_np[9][:-1,:-1]

np.save('PCA_pix_classifier_cm_list.npy', PCA_cm_list_np)



Training network # 0
Training completed. Number of epochs: 100 , Final training loss: 0.5669496655464172 , Final validation loss: 0.6553674340248108
Training network # 1
Training completed. Number of epochs: 87 , Final training loss: 0.5462183952331543 , Final validation loss: 0.5596910119056702
Training network # 2
Training completed. Number of epochs: 100 , Final training loss: 0.4459526240825653 , Final validation loss: 0.5422871708869934
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.4567455053329468 , Final validation loss: 0.48121920228004456
Training network # 4
Training completed. Number of epochs: 100 , Final training loss: 0.49018529057502747 , Final validation loss: 0.512520432472229
Training network # 5
Training completed. Number of epochs: 95 , Final training loss: 0.49599823355674744 , Final validation loss: 0.5178730487823486
Training network # 6
Training completed. Number of epochs: 100 , Final training loss: 0.479122519493103 , 



Training network # 0
Training completed. Number of epochs: 100 , Final training loss: 0.5297969579696655 , Final validation loss: 0.5665904879570007
Training network # 1
Training completed. Number of epochs: 60 , Final training loss: 0.719422459602356 , Final validation loss: 0.7746601700782776
Training network # 2
Training completed. Number of epochs: 100 , Final training loss: 0.4760820269584656 , Final validation loss: 0.5446997880935669
Training network # 3
Training completed. Number of epochs: 41 , Final training loss: 0.594230055809021 , Final validation loss: 0.6731446385383606
Training network # 4
Training completed. Number of epochs: 97 , Final training loss: 0.48736023902893066 , Final validation loss: 0.5895691514015198
Training network # 5
Training completed. Number of epochs: 97 , Final training loss: 0.4626515209674835 , Final validation loss: 0.5222212076187134
Training network # 6
Training completed. Number of epochs: 87 , Final training loss: 0.5037798881530762 , Final



Training network # 0
Training completed. Number of epochs: 96 , Final training loss: 0.4587915539741516 , Final validation loss: 0.5536099076271057
Training network # 1
Training completed. Number of epochs: 97 , Final training loss: 0.5032359957695007 , Final validation loss: 0.6243677735328674
Training network # 2
Training completed. Number of epochs: 100 , Final training loss: 0.45402729511260986 , Final validation loss: 0.5540573000907898
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.5318616628646851 , Final validation loss: 0.6492273211479187
Training network # 4
Training completed. Number of epochs: 100 , Final training loss: 0.45946040749549866 , Final validation loss: 0.4894760251045227
Training network # 5
Training completed. Number of epochs: 88 , Final training loss: 0.5197445154190063 , Final validation loss: 0.5662400126457214
Training network # 6
Training completed. Number of epochs: 97 , Final training loss: 0.46582362055778503 , 



Training network # 0
Training completed. Number of epochs: 85 , Final training loss: 0.6162964701652527 , Final validation loss: 0.6877809762954712
Training network # 1
Training completed. Number of epochs: 58 , Final training loss: 0.7297399640083313 , Final validation loss: 0.7444193959236145
Training network # 2
Training completed. Number of epochs: 95 , Final training loss: 0.4390513598918915 , Final validation loss: 0.5104751586914062
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.4733351469039917 , Final validation loss: 0.5083120465278625
Training network # 4
Training completed. Number of epochs: 76 , Final training loss: 0.5375012159347534 , Final validation loss: 0.5897596478462219
Training network # 5
Training completed. Number of epochs: 100 , Final training loss: 0.49434134364128113 , Final validation loss: 0.5455782413482666
Training network # 6
Training completed. Number of epochs: 100 , Final training loss: 0.5338733792304993 , Fi



Training network # 0
Training completed. Number of epochs: 68 , Final training loss: 0.5230402946472168 , Final validation loss: 0.5752558708190918
Training network # 1
Training completed. Number of epochs: 100 , Final training loss: 0.5034437775611877 , Final validation loss: 0.5695997476577759
Training network # 2
Training completed. Number of epochs: 100 , Final training loss: 0.4616955518722534 , Final validation loss: 0.5058884024620056
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.5258628129959106 , Final validation loss: 0.7743967771530151
Training network # 4
Training completed. Number of epochs: 100 , Final training loss: 0.43875443935394287 , Final validation loss: 0.5327054262161255
Training network # 5
Training completed. Number of epochs: 86 , Final training loss: 0.5002577900886536 , Final validation loss: 0.5659056305885315
Training network # 6
Training completed. Number of epochs: 100 , Final training loss: 0.47338318824768066 ,



Training network # 0
Training completed. Number of epochs: 100 , Final training loss: 0.5581448674201965 , Final validation loss: 0.6082430481910706
Training network # 1
Training completed. Number of epochs: 100 , Final training loss: 0.5117031931877136 , Final validation loss: 0.5366926193237305
Training network # 2
Training completed. Number of epochs: 100 , Final training loss: 0.4609036147594452 , Final validation loss: 0.5549740195274353
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.46624627709388733 , Final validation loss: 0.5279454588890076
Training network # 4
Training completed. Number of epochs: 100 , Final training loss: 0.4775393605232239 , Final validation loss: 0.5998027324676514
Training network # 5
Training completed. Number of epochs: 72 , Final training loss: 0.5270657539367676 , Final validation loss: 0.540780246257782
Training network # 6
Training completed. Number of epochs: 98 , Final training loss: 0.5467966794967651 , F



Training network # 0
Training completed. Number of epochs: 100 , Final training loss: 0.48246341943740845 , Final validation loss: 0.5978529453277588
Training network # 1
Training completed. Number of epochs: 100 , Final training loss: 0.44451722502708435 , Final validation loss: 0.5040805339813232
Training network # 2
Training completed. Number of epochs: 100 , Final training loss: 0.46637246012687683 , Final validation loss: 0.6753607988357544
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.5040686726570129 , Final validation loss: 0.5237519145011902
Training network # 4
Training completed. Number of epochs: 47 , Final training loss: 0.6759746670722961 , Final validation loss: 0.814414918422699
Training network # 5
Training completed. Number of epochs: 100 , Final training loss: 0.5504106283187866 , Final validation loss: 0.5544868111610413
Training network # 6
Training completed. Number of epochs: 100 , Final training loss: 0.45819106698036194



Training network # 0
Training completed. Number of epochs: 100 , Final training loss: 0.5334934592247009 , Final validation loss: 0.559952437877655
Training network # 1
Training completed. Number of epochs: 100 , Final training loss: 0.4675794839859009 , Final validation loss: 0.5435634255409241
Training network # 2
Training completed. Number of epochs: 100 , Final training loss: 0.47612670063972473 , Final validation loss: 0.5389412045478821
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.511850893497467 , Final validation loss: 0.5408767461776733
Training network # 4
Training completed. Number of epochs: 79 , Final training loss: 0.5309857130050659 , Final validation loss: 0.5862460732460022
Training network # 5
Training completed. Number of epochs: 100 , Final training loss: 0.49369218945503235 , Final validation loss: 0.5102737545967102
Training network # 6
Training completed. Number of epochs: 100 , Final training loss: 0.5054299831390381 , 



Training network # 0
Training completed. Number of epochs: 79 , Final training loss: 0.47182124853134155 , Final validation loss: 0.4975142478942871
Training network # 1
Training completed. Number of epochs: 100 , Final training loss: 0.5443294048309326 , Final validation loss: 0.6382140517234802
Training network # 2
Training completed. Number of epochs: 76 , Final training loss: 0.6206886768341064 , Final validation loss: 0.7422330975532532
Training network # 3
Training completed. Number of epochs: 82 , Final training loss: 0.5167284607887268 , Final validation loss: 0.5419327020645142
Training network # 4
Training completed. Number of epochs: 93 , Final training loss: 0.5331743359565735 , Final validation loss: 0.5816789865493774
Training network # 5
Training completed. Number of epochs: 100 , Final training loss: 0.4828559458255768 , Final validation loss: 0.5031838417053223
Training network # 6
Training completed. Number of epochs: 100 , Final training loss: 0.46065381169319153 , F



Training network # 0
Training completed. Number of epochs: 100 , Final training loss: 0.4506741762161255 , Final validation loss: 0.5413374900817871
Training network # 1
Training completed. Number of epochs: 100 , Final training loss: 0.5349053740501404 , Final validation loss: 0.5514583587646484
Training network # 2
Training completed. Number of epochs: 85 , Final training loss: 0.4662787616252899 , Final validation loss: 0.593339204788208
Training network # 3
Training completed. Number of epochs: 100 , Final training loss: 0.48556533455848694 , Final validation loss: 0.5102603435516357
Training network # 4
Training completed. Number of epochs: 100 , Final training loss: 0.5159204602241516 , Final validation loss: 0.5967524647712708
Training network # 5
Training completed. Number of epochs: 100 , Final training loss: 0.4634258449077606 , Final validation loss: 0.502690315246582
Training network # 6
Training completed. Number of epochs: 87 , Final training loss: 0.4836079180240631 , Fi

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (10,) + inhomogeneous part.