In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

2023-05-23 11:10:48.228076: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
# Load the datasets
output_folder = '/Users/as274094/GitHub/psf_dataset_generation/output/'
test_dataset = np.load(output_folder + 'test_Euclid_res_10_TestStars_id_001GT_100_bins.npy', allow_pickle=True)[()]
train_dataset = np.load(output_folder + '/train_Euclid_res_50_TrainStars_id_001GT_100_bins.npy', allow_pickle=True)[()]
train_stars = train_dataset['stars']
test_stars = test_dataset['stars']

In [18]:
# Hyperparameters
PCA_components = 24
model_learning_rate = 0.1
N_epochs = 50
N_committee = 48

In [19]:
def SEDlisttoC(SED_list):
    sed_array = np.array(SED_list)
    return sed_array*0.5 + 1.5

def CtoSEDarray(c_values, variance):
    sed_classes = ((c_values - 1.25) // 0.5).astype(int)
    sed_classes = np.where((c_values < 1.25) | (c_values > 7.75), 20, sed_classes)
    sed_classes = np.where((variance > 1.00), 20, sed_classes)
    return sed_classes

train_C = SEDlisttoC(train_dataset['SED_ids'])
test_C = SEDlisttoC(test_dataset['SED_ids'])

In [None]:
example_SED_original = np.random.randint(low = 0, high = 13, size = 50)
example_C = SEDlisttoC(example_SED_original)
example_C_noisy = example_C + np.random.rand(example_C.shape[0])*0.8-0.40
example_SED_obtained = CtoSEDarray(example_C_noisy)
for i in range(example_SED_original.shape[0]):
    print(example_SED_original[i], example_C_noisy[i], example_SED_obtained[i])


In [21]:
# Perform PCA on all the images
pca = PCA(n_components= PCA_components)
train_and_test_stars = np.concatenate((train_stars, test_stars), axis = 0)
pca.fit(train_and_test_stars.reshape(-1, 1024))
x_train = pca.transform(train_stars.reshape(-1, 1024))
x_test = pca.transform(test_stars.reshape(-1, 1024))
y_train = train_C
x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size = 20) # Reserve 20,000 stars for validation



In [22]:
# Define the model architecture
def create_model():
    initializer = tf.keras.initializers.GlorotNormal(seed = 1)
    model = tf.keras.Sequential([
        layers.Dense(26, input_shape=[PCA_components], activation='sigmoid', kernel_initializer= initializer),
        layers.Dense(26, activation='sigmoid', kernel_initializer= initializer),
        layers.Dense(1, activation = 'linear', kernel_initializer= initializer)
    ])
    model.compile(
        loss = tf.keras.losses.MeanSquaredError(),
        optimizer = tf.keras.optimizers.Adam(learning_rate = model_learning_rate)
    )
    return model

# Train a committee of 48 neural networks and make predictions
committee = []
committee_predictions = []
for i in range(N_committee):
    model = create_model()
    model.fit(x_train, y_train, epochs= N_epochs, verbose=0)
    committee.append(model)
    committee_predictions.append(model.predict(x_test).reshape(-1)) # Predict the scalar parameter C using the committee    

committee_predictions = np.array(committee_predictions)
y_pred = np.mean(committee_predictions, axis=0)
pred_variance = np.var(committee_predictions, axis=0)
SED_pred = CtoSEDarray(y_pred,pred_variance)
#add metrics




In [23]:
 tf.math.confusion_matrix(test_dataset['SED_ids'], SED_pred) 

<tf.Tensor: shape=(12, 12), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0]], dtype=int32)>

In [None]:
# Evaluate the performance of the committee

mse = np.mean((test_C - y_pred)**2)
print('MSE:', mse)
print(pred_variance)


for i in range(len(y_pred)):
    print(test_dataset['SED_ids'][i], y_pred[i], SED_pred[i])
