### 1. Import Libraries 

In [1]:
#%reload_ext autoreload
#%autoreload 2
%matplotlib notebook

In [2]:
PATH = "."
sz=224
batch_size=64

In [3]:
import numpy as np
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras.layers import Dropout, Flatten, Dense
from keras.applications import ResNet50, Xception
from keras.models import Model, Sequential
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
from keras import optimizers
from keras.applications.resnet50 import preprocess_input
from keras.models import load_model

Using TensorFlow backend.


In [4]:
import matplotlib.pyplot as plt
import seaborn as sn
import pandas as pd 
import numpy as np

In [5]:
import os
from os import listdir
from os.path import isfile, join, exists

### 2. Load Data / Create data_generators

In [6]:
train_df = pd.read_csv(PATH + 'deeplearning/data/training_set.csv')
val_df = pd.read_csv(PATH + 'deeplearning/data/validation_set.csv')
HP_crossmatch_df = pd.read_csv(PATH + 'deeplearning/data/high_prob_crossmatch_test_set.csv')
FO_crossmatch_df = pd.read_csv(PATH + 'deeplearning/data/full_overlap_crossmatch_test_set.csv')

#### flow_from_dir

In [7]:
train_data_dir = f'{PATH}deeplearning/data/train/'
validation_data_dir = f'{PATH}deeplearning/data/valid/'

HP_SDSS_test_data_dir = f'{PATH}deeplearning/data/HP_crossmatch_test/sdss/'
HP_DES_test_data_dir = f'{PATH}deeplearning/data/HP_crossmatch_test/des/'

FO_SDSS_test_data_dir = f'{PATH}deeplearning/data/FO_crossmatch_test/sdss/'
FO_DES_test_data_dir = f'{PATH}deeplearning/data/FO_crossmatch_test/des/'

In [8]:
train_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
vertical_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range=0.3,
rotation_range=45)

valid_datagen = ImageDataGenerator(rescale = 1./255)

test_datagen = ImageDataGenerator(
rescale = 1./255,
horizontal_flip = True,
vertical_flip = True,
fill_mode = "nearest",
zoom_range = 0.3,
width_shift_range = 0.3,
height_shift_range=0.3,
rotation_range=45)




train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size = (sz, sz),
batch_size = batch_size, 
class_mode = "categorical",
shuffle = True,
interpolation = 'nearest')

validation_generator = valid_datagen.flow_from_directory(
validation_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = "categorical",
shuffle = False,
interpolation = 'nearest')




HP_SDSS_test_generator = test_datagen.flow_from_directory(
HP_SDSS_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

HP_DES_test_generator = test_datagen.flow_from_directory(
HP_DES_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)




FO_SDSS_test_generator = test_datagen.flow_from_directory(
FO_SDSS_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

FO_DES_test_generator = test_datagen.flow_from_directory(
FO_DES_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

Found 36620 images belonging to 2 classes.
Found 963 images belonging to 2 classes.
Found 1066 images belonging to 1 classes.
Found 1066 images belonging to 1 classes.
Found 12596 images belonging to 1 classes.
Found 12596 images belonging to 1 classes.


### 3. Load Model 

In [9]:
model_final = load_model(PATH + 'deeplearning/weights/Xception_Final.h5')

### 4. Load sklearn / Def metrics 

In [9]:
import itertools
from sklearn import metrics
from sklearn.metrics import confusion_matrix

In [10]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues,
                          figure_size = (11, 6),
                          save=0):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    #if normalize:
        #cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        #print("Normalized confusion matrix")
    #else:
        #print('Confusion matrix, without normalization')

    #print(cm)

    plt.figure(figsize=figure_size)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    #plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    
    if save==1:
        plt.savefig('confusion_matrix.pdf')
        plt.savefig('confusion_matrix.png')

In [11]:
# Define probability threshold mask

def threshold_mask(pred_array, prob_threshold = 0.9999):
    ''' 
    returns the positions in the array where the probability for each class is greater than prob_threshold
    
    pred_array: A one-hot encoded array of softmax probability outputs 
    prob_threshold: Float b/w 0 and 1 to use as a threshold mask
    '''
    pred_class_indices = np.argmax(pred_array, axis=1)
    pos = np.where( pred_array[ range( pred_array.shape[0] ), pred_class_indices ] > prob_threshold )[0]
    return pos


In [12]:
# Define top n predictions mask

def top_pred_mask(pred_array, n_top_predictions = 5000 ):
    '''
    reversedurn the positions of top n most confident predictions
    
    pred_array: A one-reversedencoded array of softmax probability outputs
    n_top_predictions: Num of top predictions
    '''
    pred_class_indices = np.argmax(pred_array, axis=1)
    pos = np.argsort( pred_array[ range( pred_array.shape[0] ), pred_class_indices ] ) 
    mask = np.flip( pos )
    return mask[: n_top_predictions]

# Zeroth Recursion 

## Check Accuracy on FO 

### (i) SDSS 

In [23]:
FO_SDSS_test_generator_NEW = valid_datagen.flow_from_directory(
FO_SDSS_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

Found 12596 images belonging to 1 classes.


In [15]:
FO_SDSS_predictions = model_final.predict_generator(FO_SDSS_test_generator_NEW,  verbose=1)



**Try Without ANY MASKS**

In [16]:
#mask = threshold_mask(FO_SDSS_predictions, prob_threshold=0.9999)
#mask = threshold_mask(FO_SDSS_predictions, prob_threshold=0.999999)
mask = top_pred_mask(FO_SDSS_predictions, 6000)

In [17]:
FO_SDSS_predicted_class_indices=np.argmax(FO_SDSS_predictions,axis=1)


labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = np.array( [labels[k] for k in FO_SDSS_predicted_class_indices] )

filenames= np.array( FO_SDSS_test_generator.filenames )

results=pd.DataFrame({"Filename":filenames[mask],
                      "Pred Labels":predictions[mask],
                     "Predictions":FO_SDSS_predicted_class_indices[mask]})

In [18]:
results['OBJID'] = results['Filename'].apply(lambda x: x.split('/')[-1][:-4]).astype('int64') 
final = pd.merge(results, FO_crossmatch_df, left_on='OBJID', right_on='SDSS_OBJID')
final['Actuals'] = final['P_CS_DEBIASED'] > final['P_EL_DEBIASED']

In [19]:
final[['OBJID','Predictions','Actuals', 'Pred Labels', 'P_EL_DEBIASED', 'P_CS_DEBIASED']].head()

Unnamed: 0,OBJID,Predictions,Actuals,Pred Labels,P_EL_DEBIASED,P_CS_DEBIASED
0,587724241764483150,1,True,spiral,0.09,0.91
1,587731187802177850,1,True,spiral,0.02,0.891
2,587731187802439916,0,False,elliptical,0.579,0.391
3,587731187802439957,1,True,spiral,0.031,0.912
4,587731187802439968,1,True,spiral,0.142,0.747


In [20]:
final.shape

(6000, 250)

In [21]:
print( "Accuracy: ", metrics.accuracy_score(final['Actuals'], final['Predictions']) )
print( "f1_score: ", metrics.f1_score(final['Actuals'], final['Predictions']))

Accuracy:  0.9708333333333333
f1_score:  0.9723145071982282


In [22]:
cm = confusion_matrix(final['Actuals'], final['Predictions'])
plot_confusion_matrix(cm, classes=['Elliptical', 'Spiral'], title="Predictions on SDSS test set", figure_size=(4,3.4))

<IPython.core.display.Javascript object>

### (ii) DES 

In [24]:
FO_DES_test_generator_NEW = valid_datagen.flow_from_directory(
FO_DES_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

Found 12596 images belonging to 1 classes.


In [24]:
FO_DES_predictions = model_final.predict_generator(FO_DES_test_generator_NEW,  verbose=1)



In [25]:
#mask = threshold_mask(FO_DES_predictions, prob_threshold=0.999999)
mask = top_pred_mask(FO_DES_predictions, 6000)

In [26]:
FO_DES_predicted_class_indices=np.argmax(FO_DES_predictions,axis=1)

#FO_SDSS_predicted_class_indices = FO_SDSS_predicted_class_indices[pos]

labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = np.array( [labels[k] for k in FO_DES_predicted_class_indices] )

filenames= np.array( FO_DES_test_generator.filenames )

results=pd.DataFrame({"Filename":filenames[mask],
                      "Labels":predictions[mask],
                     "Predictions":FO_DES_predicted_class_indices[mask]})

In [27]:
results['OBJID'] = results['Filename'].apply(lambda x: x.split('/')[-1][:-4]).astype('int64') 
final = pd.merge(results, FO_crossmatch_df, left_on='OBJID', right_on='DES_COADD_OBJECT_ID')
final['Actuals'] = final['P_CS_DEBIASED'] > final['P_EL_DEBIASED']

In [28]:
final[['OBJID','Predictions','Actuals', 'Labels', 'P_EL_DEBIASED', 'P_CS_DEBIASED']].head()

Unnamed: 0,OBJID,Predictions,Actuals,Labels,P_EL_DEBIASED,P_CS_DEBIASED
0,100005001,1,True,spiral,0.048,0.923
1,138459097,1,True,spiral,0.117,0.794
2,260728929,1,True,spiral,0.026,0.974
3,260775373,1,True,spiral,0.04,0.96
4,260775390,0,False,elliptical,0.5,0.308


In [29]:
final.shape

(6000, 250)

In [30]:
print( "Accuracy: ", metrics.accuracy_score(final['Actuals'], final['Predictions']) )
print( "f1_score: ", metrics.f1_score(final['Actuals'], final['Predictions']))

Accuracy:  0.9716666666666667
f1_score:  0.9764933628318584


In [31]:
cm = confusion_matrix(final['Actuals'], final['Predictions'])
plot_confusion_matrix(cm, classes=['Elliptical', 'Spiral'], title="Predictions on DES FO test set", figure_size=(4,3.4))

<IPython.core.display.Javascript object>

## Extracting Training from Unlabelled DES 

In [17]:
UL_df = pd.read_csv(PATH + 'deeplearning/data/unlabelled_test_set.csv')

In [18]:
UL_test_data_dir = f'{PATH}deeplearning/data/unlabelled/'

In [34]:
UL_test_generator = test_datagen.flow_from_directory(
UL_test_data_dir,
target_size = (sz, sz),
batch_size = 1,
class_mode = None,
shuffle = False)

Found 8983 images belonging to 1 classes.


In [35]:
UL_predictions = model_final.predict_generator(UL_test_generator, verbose=1)



In [36]:
mask = top_pred_mask(UL_predictions, 4500)

In [37]:
UL_predicted_class_indices=np.argmax(UL_predictions,axis=1)


labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = np.array( [labels[k] for k in UL_predicted_class_indices] )

filenames= np.array( UL_test_generator.filenames )

results=pd.DataFrame({"Filename":filenames[mask],
                      "Pred Labels":predictions[mask],
                     "Predictions":UL_predicted_class_indices[mask]})

In [38]:
mcs = np.flip( np.argsort(UL_predictions[:,1]) )[:500]
mce = np.flip( np.argsort(UL_predictions[:,0]) )[:500]

mcs_filenames= np.array( UL_test_generator.filenames )[mcs]
mce_filenames= np.array( UL_test_generator.filenames )[mce]

In [39]:
spirals = results[ results['Filename'].isin(mcs_filenames) ]
ellipticals = results[ results['Filename'].isin(mce_filenames) ]

In [40]:
training_df_1 = pd.concat([spirals, ellipticals])

In [41]:
training_df_1.Filename = training_df_1['Filename'].apply(lambda x: x.split('/')[-1] )

In [42]:
training_df_1.head()

Unnamed: 0,Filename,Pred Labels,Predictions
0,99944914.png,spiral,1
1,331226682.png,spiral,1
2,331129808.png,spiral,1
3,330949625.png,spiral,1
4,330872043.png,spiral,1


In [43]:
train_generator.class_indices

{'elliptical': 0, 'spiral': 1}

In [44]:
def plot_pred(df, start_index = 0):
    
    for i, index in enumerate( df.index[start_index:start_index+100] ):
    
        row = results.loc[index]
        filename = row.Filename
        type_ = row['Pred Labels']

        img_path = UL_test_data_dir + str(filename)

        img = plt.imread(img_path )

        plt.subplot(10,10,i+1)
        plt.axis('Off')
        plt.subplots_adjust(wspace=0.1,hspace=0.05)
        plt.imshow(img)
        plt.suptitle('Predicted ' + type_)
        
    plt.show()

In [45]:
plot_pred(training_df_1[ training_df_1.Predictions==1 ], 400)

<IPython.core.display.Javascript object>

In [46]:
plot_pred(training_df_1[ training_df_1.Predictions==0 ], 400)

<IPython.core.display.Javascript object>

# 1st Recursion 

## Create Training Dir 

In [14]:
import pandas as pd
from subprocess import call
from pathlib import Path

In [15]:
PATH = '/home/khan74/scratch/new_DL_DES/'

In [19]:
file_path = Path( UL_test_data_dir + 'test/' )
link_path = Path( PATH + 'deeplearning/data/recursive')

In [56]:
for i in training_df_1.index:
    
    row = training_df_1.loc[i]
    filename = row.Filename
    
    if row.Predictions == 1:
        type = 'spiral'
    else:
        type = 'elliptical'
        
    img_path = file_path.joinpath(filename)
    soft_link_path = link_path.joinpath(type, filename)
    
    if not img_path.exists():
        print( 'Not Found: Image with OBJID %d' %objid)  
    if soft_link_path.exists():
        continue
    
    status = call(['cp', img_path, soft_link_path])
    if status != 0:
        print("Command failed with return code - ", status)


In [20]:
file_path = Path( PATH + 'data/sdss-galaxyzoo/high_certainty/images/remapped' )

In [21]:
for i in train_df.index:
    
    row = train_df.loc[i]
    objid = row.OBJID
    
    if row.P_CS_DEBIASED > row.P_EL_DEBIASED:
        type = 'spiral'
    else:
        type = 'elliptical'
        
    img_path = file_path.joinpath('%d.png' % objid)
    soft_link_path = link_path.joinpath(type, '%d.png' % objid)
    
    if not img_path.exists():
        print( 'Not Found: Image with OBJID %d' %objid)  
    if soft_link_path.exists():
        continue
    
    status = call(['cp', img_path, soft_link_path])
    if status != 0:
        print("Command failed with return code - ", status)

In [22]:
link_path

PosixPath('/home/khan74/scratch/new_DL_DES/deeplearning/data/recursive')

## Training 

In [13]:
rec_train_data_dir = f'{PATH}deeplearning/data/recursive/'

rec_train_generator = train_datagen.flow_from_dataframe(
dataframe = training_df_1,
directory = UL_test_data_dir + 'test/',
x_col = 'Filename',
y_col = 'Pred Labels',
has_ext = True,
target_size = (sz, sz),
batch_size = batch_size,
class_mode = "categorical",
classes = ['elliptical', 'spiral'],
shuffle = True,
interpolation = 'nearest')

In [14]:
rec_train_generator = train_datagen.flow_from_directory(
rec_train_data_dir,
target_size = (sz, sz),
batch_size = batch_size, 
class_mode = "categorical",
shuffle = True,
interpolation = 'nearest')

Found 37620 images belonging to 2 classes.


#### Define Model 

In [15]:
num_of_classes = 2

base_model = Xception(input_shape=(sz,sz,3), weights='imagenet', include_top=False)
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.7)(x)
x = Dense(1024, activation="relu", name='second_last_layer')(x)
predictions = Dense(num_of_classes, activation="softmax")(x)

model_final = Model(inputs=base_model.input, outputs=predictions)

for layer in base_model.layers:
    layer.trainable = False

model_final.summary()

model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.Adam(lr=0.0001), metrics=["accuracy"])

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 111, 111, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 111, 111, 32) 128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 111, 111, 32) 0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

#### Freeze all base model layers 

In [16]:
os.environ["CUDA_VISIBLE_DEVICES"]='0'

In [17]:
history_1 = model_final.fit_generator(
rec_train_generator,
steps_per_epoch = rec_train_generator.n // rec_train_generator.batch_size,
epochs = 1,
validation_data = validation_generator,
validation_steps = validation_generator.n // validation_generator.batch_size)

Epoch 1/1


In [18]:
#model_final.save(PATH + 'deeplearning/recursive_weights/Xception_rec_freeze.h5')
model_final.save(PATH + 'deeplearning/recursive_weights/Xception_rec_freeze_scratch.h5')

In [19]:
#model_final = load_model(PATH + 'deeplearning/recursive_weights/Xception_rec_freeze.h5')
model_final = load_model(PATH + 'deeplearning/recursive_weights/Xception_rec_freeze_scratch.h5')

#### (ii) Unfreeze [at Layer 40]

In [20]:
len(model_final.layers)

137

In [21]:
split_at = 40
for layer in model_final.layers[:split_at]: layer.trainable = False
for layer in model_final.layers[split_at:]: layer.trainable = True  
model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.Adam(lr=0.0001), metrics=["accuracy"])

In [22]:
#Select Call Backs

Early_Stop = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=3)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-8, verbose=1, mode='min')
#checkpoint = keras.callbacks.ModelCheckpoint(PATH + 'deeplearning/recursive_weights/Xception_rec_UnFreeze.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1)
checkpoint = keras.callbacks.ModelCheckpoint(PATH + 'deeplearning/recursive_weights/Xception_rec_UnFreeze_scratch.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1)

In [23]:
history_2 = model_final.fit_generator(
rec_train_generator,
steps_per_epoch = rec_train_generator.n // rec_train_generator.batch_size,
epochs = 4,
workers=1,
validation_data = validation_generator,
validation_steps = validation_generator.n // validation_generator.batch_size,
callbacks=[Early_Stop, reduce_lr, checkpoint])

Epoch 1/4

Epoch 00001: val_loss improved from inf to 0.54237, saving model to /home/khan74/scratch/new_DL_DES/deeplearning/recursive_weights/Xception_rec_UnFreeze_scratch.h5
Epoch 2/4

Epoch 00002: val_loss improved from 0.54237 to 0.33823, saving model to /home/khan74/scratch/new_DL_DES/deeplearning/recursive_weights/Xception_rec_UnFreeze_scratch.h5
Epoch 3/4

Epoch 00003: val_loss did not improve from 0.33823
Epoch 4/4

Epoch 00004: val_loss improved from 0.33823 to 0.22548, saving model to /home/khan74/scratch/new_DL_DES/deeplearning/recursive_weights/Xception_rec_UnFreeze_scratch.h5


#### (iii) Unfreeze [at Layer 2] 

In [15]:
model_final = load_model(PATH + 'deeplearning/recursive_weights/Xception_rec_UnFreeze_scratch.h5')

In [16]:
split_at = 2
for layer in model_final.layers[:split_at]: layer.trainable = False
for layer in model_final.layers[split_at:]: layer.trainable = True  
model_final.compile(loss = "categorical_crossentropy", optimizer = optimizers.Adam(lr=0.0001), metrics=["accuracy"])

In [17]:
#Select Call Backs

Early_Stop = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=3)
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-8, verbose=1, mode='min')
#checkpoint = keras.callbacks.ModelCheckpoint(PATH + 'deeplearning/recursive_weights/Xception_rec_UnFreeze_2.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1)
checkpoint = keras.callbacks.ModelCheckpoint(PATH + 'deeplearning/recursive_weights/Xception_rec_UnFreeze_2_scratch.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='min', period=1)

In [18]:
history_3 = model_final.fit_generator(
rec_train_generator,
steps_per_epoch = rec_train_generator.n // rec_train_generator.batch_size,
epochs = 20,
workers =1,
validation_data = validation_generator,
validation_steps = validation_generator.n // validation_generator.batch_size,
callbacks=[Early_Stop, reduce_lr, checkpoint])

Epoch 1/20

Epoch 00001: val_loss improved from inf to 0.00194, saving model to /home/khan74/scratch/new_DL_DES/deeplearning/recursive_weights/Xception_rec_UnFreeze_2_scratch.h5
Epoch 2/20

Epoch 00002: val_loss did not improve from 0.00194
Epoch 3/20

Epoch 00003: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.

Epoch 00003: val_loss did not improve from 0.00194
Epoch 4/20

Epoch 00004: val_loss did not improve from 0.00194


In [20]:
model_final.save(PATH + 'deeplearning/recursive_weights/Xception_rec_Final_scratch.h5')

In [21]:
model_final = load_model(PATH + 'deeplearning/recursive_weights/Xception_rec_Final_scratch.h5')

In [17]:
model_final = load_model(PATH + 'deeplearning/recursive_weights/Xception_rec_UnFreeze_2_scratch.h5')

## Check Accuracy on FO 

### (i) SDSS 

In [25]:
FO_SDSS_predictions = model_final.predict_generator(FO_SDSS_test_generator_NEW,  verbose=1)



In [34]:
#mask = threshold_mask(FO_SDSS_predictions, prob_threshold=0.9999)
#mask = threshold_mask(FO_SDSS_predictions, prob_threshold=0.999999)
mask = top_pred_mask(FO_SDSS_predictions, 6000)

In [35]:
FO_SDSS_predicted_class_indices=np.argmax(FO_SDSS_predictions,axis=1)


labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = np.array( [labels[k] for k in FO_SDSS_predicted_class_indices] )

filenames= np.array( FO_SDSS_test_generator.filenames )

results=pd.DataFrame({"Filename":filenames[mask],
                      "Pred Labels":predictions[mask],
                     "Predictions":FO_SDSS_predicted_class_indices[mask]})

In [36]:
results['OBJID'] = results['Filename'].apply(lambda x: x.split('/')[-1][:-4]).astype('int64') 
final = pd.merge(results, FO_crossmatch_df, left_on='OBJID', right_on='SDSS_OBJID')
final['Actuals'] = final['P_CS_DEBIASED'] > final['P_EL_DEBIASED']

In [37]:
final[['OBJID','Predictions','Actuals', 'Pred Labels', 'P_EL_DEBIASED', 'P_CS_DEBIASED']].head()

Unnamed: 0,OBJID,Predictions,Actuals,Pred Labels,P_EL_DEBIASED,P_CS_DEBIASED
0,587724241764483150,1,True,spiral,0.09,0.91
1,587731511538024608,0,False,elliptical,0.902,0.064
2,587731174380929599,1,True,spiral,0.041,0.938
3,587731173845696979,1,True,spiral,0.016,0.836
4,587731511537958990,0,False,elliptical,0.847,0.098


In [38]:
final.shape

(6000, 250)

In [39]:
print( "Accuracy: ", metrics.accuracy_score(final['Actuals'], final['Predictions']) )
print( "f1_score: ", metrics.f1_score(final['Actuals'], final['Predictions']))

Accuracy:  0.982
f1_score:  0.9805964786201941


In [40]:
cm = confusion_matrix(final['Actuals'], final['Predictions'])
plot_confusion_matrix(cm, classes=['Elliptical', 'Spiral'], title="Predictions on SDSS test set", figure_size=(4,3.4))

<IPython.core.display.Javascript object>

### (ii) DES 

In [26]:
FO_DES_predictions = model_final.predict_generator(FO_DES_test_generator_NEW,  verbose=1)



In [27]:
#mask = threshold_mask(FO_DES_predictions, prob_threshold=0.999999)
mask = top_pred_mask(FO_DES_predictions, 6000)

In [28]:
FO_DES_predicted_class_indices=np.argmax(FO_DES_predictions,axis=1)

#FO_SDSS_predicted_class_indices = FO_SDSS_predicted_class_indices[pos]

labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = np.array( [labels[k] for k in FO_DES_predicted_class_indices] )

filenames= np.array( FO_DES_test_generator.filenames )

results=pd.DataFrame({"Filename":filenames[mask],
                      "Labels":predictions[mask],
                     "Predictions":FO_DES_predicted_class_indices[mask]})

In [29]:
results['OBJID'] = results['Filename'].apply(lambda x: x.split('/')[-1][:-4]).astype('int64') 
final = pd.merge(results, FO_crossmatch_df, left_on='OBJID', right_on='DES_COADD_OBJECT_ID')
final['Actuals'] = final['P_CS_DEBIASED'] > final['P_EL_DEBIASED']

In [30]:
final[['OBJID','Predictions','Actuals', 'Labels', 'P_EL_DEBIASED', 'P_CS_DEBIASED']].head()

Unnamed: 0,OBJID,Predictions,Actuals,Labels,P_EL_DEBIASED,P_CS_DEBIASED
0,100005001,1,True,spiral,0.048,0.923
1,122565946,1,True,spiral,0.257,0.565
2,237599147,0,False,elliptical,0.726,0.24
3,237600442,1,True,spiral,0.2,0.764
4,237600845,1,True,spiral,0.351,0.551


In [31]:
final.shape

(6000, 250)

In [32]:
print( "Accuracy: ", metrics.accuracy_score(final['Actuals'], final['Predictions']) )
print( "f1_score: ", metrics.f1_score(final['Actuals'], final['Predictions']))

Accuracy:  0.9768333333333333
f1_score:  0.9759057028947825


In [33]:
cm = confusion_matrix(final['Actuals'], final['Predictions'])
plot_confusion_matrix(cm, classes=['Elliptical', 'Spiral'], title="Predictions on DES FO test set", figure_size=(4,3.4))

<IPython.core.display.Javascript object>