In [1]:
# Script that receives an unclassified PSF dataset and returns the dataset with the according SEDs assigned by a CNN classifier

import numpy as np
import tensorflow as tf
import sklearn.metrics as skm

2023-07-25 16:27:50.798713: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
# Load datasets
dataset_path = '/Users/as274094/Documents/psf_dataset4/'
dataset_name = 'train_Euclid_2000_stars_id_004GT_350_bins.npy'
dataset_1 = np.load(dataset_path + dataset_name, allow_pickle=True)[()] # The dataset to classify

x_to_convert = np.expand_dims(dataset_1['noisy_stars'], axis = 3)

In [3]:
# Load model

model_path = '/Users/as274094/GitHub/Refractored_star_classifier/tensorflow_version/best_models/CNN_model/'
classifier = tf.keras.models.load_model(model_path)
    
def calculate_success_rate(confusion_matrix):
    diagonal = np.trace(confusion_matrix)
    diagonal_neighbors = np.sum(np.diagonal(confusion_matrix, offset=1)) + np.sum(np.diagonal(confusion_matrix, offset=-1))
    total_classified = np.sum(confusion_matrix)
    
    success_rate = (diagonal + diagonal_neighbors) / total_classified
    return success_rate


In [6]:
# Make predictions and calculate metrics

y_test_pred = classifier.predict(x_to_convert, verbose = 1)
class_predictions = np.argmax(y_test_pred, axis = 1)

f1_mean = np.mean(skm.f1_score(dataset_1['SED_ids'], class_predictions, average = None)[:13])
print('Average F1 score:', f1_mean)

confusion_matrix = skm.confusion_matrix(dataset_1['SED_ids'], class_predictions)
print("\nConfusion matrix:")
print(confusion_matrix)

success_rate = calculate_success_rate(confusion_matrix)
print('\nSuccess rate:', success_rate)

Average F1 score: 0.6803504313288968

Confusion matrix:
[[ 93  46   4   0   0   0   0   0   0   0   0   0   0]
 [ 64  93  15   0   1   0   0   0   0   0   0   0   0]
 [  2   9  94  37   6   0   0   0   0   0   0   0   0]
 [  0   2  47  72  24   1   0   0   0   0   0   0   0]
 [  0   0   2  14 114  15   0   0   0   0   0   0   0]
 [  0   0   0   0  23 109  35   1   0   0   0   0   0]
 [  0   0   0   0   4  29 105  12   0   0   0   0   0]
 [  0   0   0   0   1   0  25  87  24  12   0   0   0]
 [  0   0   0   0   0   0   3  73  46  38   0   0   0]
 [  0   0   0   0   0   0   0  18  36  92   1   0   0]
 [  0   0   0   0   0   0   0   0   0   4 148   1   0]
 [  0   0   0   0   0   0   0   0   0   0   1 170   0]
 [  0   0   0   0   0   0   0   0   0   0   0   0 147]]

Success rate: 0.9715


In [8]:
# Assign SEDs
concatenated_SEDs = np.load('concatenated_SEDs.npy', allow_pickle=True)[()]

SED_list = []
for spectral_class in class_predictions:
    concat_SED = concatenated_SEDs[spectral_class]
    SED_list.append(concat_SED)
SED_array = np.array(SED_list, dtype=object)

In [11]:
# Save the new dataset

dataset_1['SEDs'] = SED_array 
dataset_1['SED_ids'] = class_predictions
dataset_1['F1'] = f1_mean
dataset_1['success_rate'] = success_rate

np.save(
        dataset_path + 'assigned_CNN_' + dataset_name,
        dataset_1,
        allow_pickle=True
    )

In [13]:
# Verification

assigned_dataset = np.load('/Users/as274094/Documents/psf_dataset4/assigned_CNN_train_Euclid_2000_stars_id_004GT_350_bins.npy', allow_pickle=True)[()]
print(assigned_dataset['F1'],assigned_dataset['success_rate'])

0.6803504313288968 0.9715
