In [1]:
import numpy as np
from matplotlib import pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as torchdata

In [2]:

# Get names of columns
data=np.genfromtxt('../Lezione_19/galaxyquasar.csv', delimiter=',', names=True, usecols=(0,1,2,3,4,6))
names = data.dtype.names
print(names)

#Get data without last column (z-error)
data=np.genfromtxt('../Lezione_19/galaxyquasar.csv', delimiter=',',skip_header=1,usecols=(0,1,2,3,4,6),dtype=float)
print(data.shape)

#Get true labels
labels = np.genfromtxt('../Lezione_19/galaxyquasar.csv', delimiter=',', skip_header=1, usecols=(5), dtype=str)
print(labels.shape)

ug_set = data[:,0] - data[:, 1]
gr_set = data[:, 1] - data[:, 2]
ri_set = data[:, 2] - data[:, 3]
iz_set = data[:, 3] - data[:, 4]
labels = np.array([1 if label == 'QSO' else 0 for label in labels])


from sklearn.model_selection import train_test_split

X_tot = np.column_stack((ug_set,gr_set,ri_set,iz_set))
X = X_tot
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)


('u', 'g', 'r', 'i', 'z', 'z1')
(50000, 6)
(50000,)


In [3]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from scikeras.wrappers import KerasClassifier

#Define the neural network model using keras
def NN_model(nhidden = 10, nlayers = 1, drop_rate = 0.1, activation = 'sigmoid'):
    model = keras.models.Sequential()
    model.add(layers.Input(shape=(4,)))
    for _ in range(nlayers):
        model.add(layers.Dense(nhidden,activation = activation))
        model.add(layers.Dropout(drop_rate))
    model.add(layers.Dense(1, activation = activation, name = 'output'))
    model.compile (optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

NN_model().summary()

clf_model = KerasClassifier(build_fn=NN_model, verbose=2)

In [4]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test) 

#Downsampling training set for faster hyperparamter tuning
X_downsamp = X_train_s[:int(0.25*len(X_train_s))]
y_downsamp = y_train[:int(0.25*len(y_train))]
np.random.seed(42) 
param_grid = {
    'batch_size': [16, 32, 64],
    'epochs': randint(10, 50),
    'model__nhidden': randint(1, 6),
    'model__nlayers': randint(1, 4),
    'model__drop_rate': uniform(0.0, 0.5),
}

clf_search = RandomizedSearchCV(estimator=clf_model, param_distributions = param_grid,
                                n_iter = 5, cv = 3, verbose = 2, n_jobs  =-1, random_state=42)
clf_search.fit(X_downsamp, y_downsamp)

Fitting 3 folds for each of 5 candidates, totalling 15 fits


  X, y = self._initialize(X, y)


Epoch 1/31
157/157 - 10s - 61ms/step - accuracy: 0.1521 - loss: 0.9219
Epoch 2/31
157/157 - 2s - 10ms/step - accuracy: 0.6250 - loss: 0.6604
Epoch 3/31
157/157 - 1s - 9ms/step - accuracy: 0.8558 - loss: 0.5148
Epoch 4/31
157/157 - 1s - 9ms/step - accuracy: 0.8564 - loss: 0.4357
Epoch 5/31
157/157 - 2s - 10ms/step - accuracy: 0.8564 - loss: 0.3887
Epoch 6/31
157/157 - 1s - 9ms/step - accuracy: 0.8564 - loss: 0.3546
Epoch 7/31
157/157 - 1s - 9ms/step - accuracy: 0.8564 - loss: 0.3277
Epoch 8/31
157/157 - 1s - 9ms/step - accuracy: 0.8564 - loss: 0.3038
Epoch 9/31
157/157 - 1s - 9ms/step - accuracy: 0.8567 - loss: 0.2814
Epoch 10/31
157/157 - 2s - 16ms/step - accuracy: 0.8564 - loss: 0.2597
Epoch 11/31
157/157 - 1s - 8ms/step - accuracy: 0.8583 - loss: 0.2375
Epoch 12/31
157/157 - 1s - 8ms/step - accuracy: 0.8628 - loss: 0.2167
Epoch 13/31
157/157 - 1s - 8ms/step - accuracy: 0.9357 - loss: 0.1969
Epoch 14/31
157/157 - 1s - 8ms/step - accuracy: 0.9654 - loss: 0.1784
Epoch 15/31
157/157 - 1s

0,1,2
,estimator,KerasClassifi..._weight=None )
,param_distributions,"{'batch_size': [16, 32, ...], 'epochs': <scipy.stats....001E3B2ADB2B0>, 'model__drop_rate': <scipy.stats....001E3CC953100>, 'model__nhidden': <scipy.stats....001E3B2ADB5B0>, ...}"
,n_iter,5
,scoring,
,n_jobs,-1
,refit,True
,cv,3
,verbose,2
,pre_dispatch,'2*n_jobs'
,random_state,42

0,1,2
,model,
,build_fn,<function NN_...001E3B40BF130>
,warm_start,False
,random_state,
,optimizer,'rmsprop'
,loss,
,metrics,
,batch_size,64
,validation_batch_size,
,verbose,2


In [5]:

from sklearn.metrics import confusion_matrix
from astroML.utils import completeness_contamination
#Get best parameters from search
best_params = clf_search.best_params_
print("Best parameters found: ", best_params)

#Train the model with best parameters
clf_model.set_params(**best_params)
clf_model.fit(X_train_s, y_train)

Best parameters found:  {'batch_size': 64, 'epochs': 31, 'model__drop_rate': np.float64(0.028205789513550128), 'model__nhidden': 4, 'model__nlayers': 2}


  X, y = self._initialize(X, y)


Epoch 1/31
625/625 - 9s - 14ms/step - accuracy: 0.4347 - loss: 0.8199
Epoch 2/31
625/625 - 5s - 7ms/step - accuracy: 0.8573 - loss: 0.4200
Epoch 3/31
625/625 - 4s - 7ms/step - accuracy: 0.8573 - loss: 0.3271
Epoch 4/31
625/625 - 4s - 7ms/step - accuracy: 0.8573 - loss: 0.2599
Epoch 5/31
625/625 - 4s - 7ms/step - accuracy: 0.9451 - loss: 0.1950
Epoch 6/31
625/625 - 4s - 7ms/step - accuracy: 0.9744 - loss: 0.1418
Epoch 7/31
625/625 - 4s - 7ms/step - accuracy: 0.9759 - loss: 0.1106
Epoch 8/31
625/625 - 4s - 7ms/step - accuracy: 0.9795 - loss: 0.0928
Epoch 9/31
625/625 - 5s - 8ms/step - accuracy: 0.9809 - loss: 0.0830
Epoch 10/31
625/625 - 6s - 9ms/step - accuracy: 0.9816 - loss: 0.0789
Epoch 11/31
625/625 - 5s - 8ms/step - accuracy: 0.9818 - loss: 0.0756
Epoch 12/31
625/625 - 4s - 7ms/step - accuracy: 0.9822 - loss: 0.0747
Epoch 13/31
625/625 - 5s - 8ms/step - accuracy: 0.9822 - loss: 0.0729
Epoch 14/31
625/625 - 4s - 7ms/step - accuracy: 0.9823 - loss: 0.0726
Epoch 15/31
625/625 - 5s - 8

0,1,2
,model,
,build_fn,<function NN_...001E3B40BF130>
,warm_start,False
,random_state,
,optimizer,'rmsprop'
,loss,
,metrics,
,batch_size,64
,validation_batch_size,
,verbose,2


In [6]:
y_prob = clf_model.predict(X_test_s).flatten()
y_pred = (y_prob > 0.5).astype(int)
conf_matrix = confusion_matrix(y_test, y_pred)
comp, cont = completeness_contamination(y_pred, y_test)
print("Confusion Matrix:\n", conf_matrix)
print(f"Completeness: {comp:.3f}, Precision: {1-cont:.3f}")
accuracy = conf_matrix.trace()/len(y_test)
print(f"Accuracy: {accuracy:.3f}")

157/157 - 1s - 7ms/step
Confusion Matrix:
 [[8508   64]
 [ 101 1327]]
Completeness: 0.929, Precision: 0.954
Accuracy: 0.984
