In [6]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.metrics import f1_score

In [7]:
# Load the dataset
data_path = '/Users/as274094/GitHub/Refractored_star_classifier/tensorflow_version/'
dataset = np.load(data_path + 'PCA_dataset1.npy', allow_pickle=True)[()]

x_train = dataset['train_stars_pca']
x_val = dataset['validation_stars_pca']
x_test = dataset['test_stars_pca']
y_train = dataset['train_C']
y_val = dataset['validation_C']
y_test = dataset['test_C']
SED_test = dataset['test_SEDs']


In [8]:
# Hyperparameters
PCA_components = 24
model_learning_rate = 0.1
N_epochs = 50
N_committee = 48

In [9]:
def SEDlisttoC(SED_list):
    sed_array = np.array(SED_list)
    return sed_array*0.5 + 1.5

def CtoSEDarray(c_values, variance):
    sed_classes = ((c_values - 1.25) // 0.5).astype(int)
    sed_classes = np.where((c_values < 1.25) | (c_values > 7.75), 20, sed_classes)
    sed_classes = np.where((variance > 1.00), 20, sed_classes)
    return sed_classes

def calculate_success_rate(confusion_matrix):
    diagonal = np.trace(confusion_matrix)
    diagonal_neighbors = np.sum(np.diagonal(confusion_matrix, offset=1)) + np.sum(np.diagonal(confusion_matrix, offset=-1))
    total_classified = np.sum(confusion_matrix)
    
    success_rate = (diagonal + diagonal_neighbors) / total_classified
    return success_rate

In [10]:
# Define the model architecture

initializer = tf.keras.initializers.GlorotNormal(seed = 1)
model = tf.keras.Sequential([
    layers.Dense(26, input_shape=[PCA_components], activation='sigmoid', kernel_initializer= initializer),
    layers.Dense(26, activation='sigmoid', kernel_initializer= initializer),
    layers.Dense(1, activation = 'linear', kernel_initializer= initializer)
])
model.compile(
    loss = tf.keras.losses.MeanSquaredError(),
    optimizer = tf.keras.optimizers.Adam(learning_rate = model_learning_rate)
)

# Train the model and make predictions
early_stopping = tf.keras.callbacks.EarlyStopping(monitor = "val_loss", patience = 10, restore_best_weights=True)
model.fit(x_train, y_train, epochs= N_epochs, callbacks = [early_stopping], validation_data=(x_val,y_val))
C_pred = model.predict(x_test, verbose = 1).reshape(-1) # Predict the scalar parameter C using the committee    
SED_pred = CtoSEDarray(C_pred,np.zeros_like(C_pred))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50


In [14]:
# Evaluate the performance of the classifier

mse = np.mean((y_test - y_pred)**2)
print('MSE:', mse)

f1 = f1_score(SED_test, SED_pred, average = None)
print('\nF1 score:', f1)
print(np.mean(f1[:13]))

for i in range(len(SED_test)):
    print(SED_test[i], C_pred[i], SED_pred[i])


confusion_matrix = tf.math.confusion_matrix(SED_test, SED_pred) 
print("\nConfusion matrix:")
print(confusion_matrix)

success_rate = calculate_success_rate(confusion_matrix)
print('\nSuccess rate:', success_rate)



MSE: 2.462505780239917

F1 score: [0. 0. 0. 0. 0. 0. 0. 0. 0.]
0.0
0 4.247594 5
11 4.247595 5
4 4.2475905 5
7 4.2476025 5
6 4.247599 5
3 4.2475886 5
3 4.247602 5
4 4.2475963 5
9 4.2475934 5
8 4.247597 5

Confusion matrix:
tf.Tensor(
[[0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0 0 0]
 [0 0 0 0 0 2 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0 0 0]], shape=(12, 12), dtype=int32)

Success rate: 0.3
