# Train a CNN with a .fits file as input

In [101]:
# imports
from __future__ import print_function
import keras
from keras import utils as np_utils
import tensorflow
from keras import datasets, layers, models
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import tensorflow as tf
from matplotlib import pyplot as plt

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


In [102]:
import joblib
from skimage.transform import resize
from skimage.io import imread
from init_modules import *


# Set the path to the data directory
data_set = {}
im_size = 100
shift_interval = 1

# This function finds the position of the object in the image
def find_object_pos(file):
    cd = ClustarData(path=file, group_factor=0)
    if len(cd.groups) > 0:
        disk = cd.groups[0]
        bounds = disk.image.bounds
        x = (bounds[2] + bounds[3])/2
        y = (bounds[0] + bounds[1])/2
        return (x, y)
    else:
        # img_data = fits.getdata(file)
        
        # return img_data.data.shape[0]/2, img_data.data.shape[1]/2
        print("No object found in {}".format(file))
        return None


# This function crops the image to the size of the object
def init_cropped_images(directory_of_fits_files):
    fits_files = []
    for fits_file in directory_of_fits_files:
        img_data = fits.getdata(fits_file)
        object_pos = find_object_pos(fits_file)

        if object_pos != None:
            # Data shape is (1, 1, x, y) we want it to be (x, y)
            img_data.shape = (img_data.shape[2], img_data.shape[3])

            # Set the size of the crop in pixels
            crop_size = units.Quantity((im_size, im_size), units.pixel)

            img_crop = Cutout2D(img_data, object_pos, crop_size)

            fits_files.append(img_crop)

    return fits_files


# This function rotates the image by a random angle and shifts it by a random amount in a random direction
def rotate_disk(disk_to_rotate, angle):

    # Rotate the disk
    rotated_disk = rotate(disk_to_rotate, angle)
    # Since rotating pads the image, we need to crop it to the original size
    x, y = (len(rotated_disk[0]), len(rotated_disk))

    shift_interval = 0
    si = shift_interval + 1

    rand_x_shift = random.randint(-shift_interval, shift_interval)
    rand_y_shift = random.randint(-shift_interval, shift_interval)

    (x_lower, x_upper) = int((x/2 - im_size/2)) + \
        rand_x_shift, int(x/2 + im_size/2) + rand_x_shift
    (y_lower, y_upper) = int((y/2 - im_size/2)) + \
        rand_y_shift, int(y/2 + im_size/2) + rand_y_shift

    return rotated_disk[(x_lower+si):(x_upper-si), (y_lower+si):(y_upper-si)]


# This function flips the image horizontally, vertically or both
def flip_disk(disk_to_flip):

    flipped_disk = disk_to_flip

    if bool(random.getrandbits(1)):
        flipped_disk = np.fliplr(flipped_disk)

    if bool(random.getrandbits(1)):
        flipped_disk = np.flipud(flipped_disk)

    if bool(random.getrandbits(1)):
        flipped_disk = np.flip(flipped_disk)

    return flipped_disk


# This function augments the image by rotating and flipping it
def augment_disk(disk):
    angle = random.randint(0, 360)
    return rotate_disk(flip_disk(disk), angle)



In [103]:
#gGenerate dataset from the fits files

def generate_dataset(augmentations_per_gaussian, directory_of_fits_files):
    count = 0
    dataset = []
    fits_files = init_cropped_images(directory_of_fits_files)
    for fits_file in fits_files:
        for i in range(0, augmentations_per_gaussian):
            if len(fits_file.data) != 0:
                # zscale = ZScaleInterval(contrast=0.25, nsamples=1)
                
                # ret_data = zscale()
                dataset.append(augment_disk(fits_file.data))
            print(count)
            count += 1
    return dataset


In [104]:
print(generate_dataset(2, glob.glob('data/train_pos/*.fits')))


  stats_.y_len = 2 * np.sqrt(stats_.eigen_values[1] *


No object found in data/train_pos\hh212_2015_band7_0.fits
No object found in data/train_pos\hh212_2015_band7_1.fits
0
1
2
3
4
5
6
7
[array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32), array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., 

In [113]:
# Hyper-parameters data-loading and formatting
nmbr_of_aug = 15
batch_size = 128
num_classes = 2
epochs = 3
img_rows, img_cols = 82, 82  # sqrt of 6724

x_train = np.array(generate_dataset(nmbr_of_aug, glob.glob('data/train_pos/*.fits')) +
                   generate_dataset(nmbr_of_aug, glob.glob('data/train_neg/*.fits')))

batch_size = int(len(x_train)/10)

print(len(x_train))
print(x_train[0])
print(x_train[0].shape)


lbl_train = [0] * len(generate_dataset(nmbr_of_aug, glob.glob('data/train_pos/*.fits'))) + \
            [1] * len(generate_dataset(nmbr_of_aug, glob.glob('data/train_neg/*.fits')))


x_test = np.array(generate_dataset(nmbr_of_aug, glob.glob('data/test_pos/*.fits')) +
                  generate_dataset(nmbr_of_aug, glob.glob('data/test_neg/*.fits')))

lbl_test =  [0] * len(generate_dataset(nmbr_of_aug, glob.glob('data/test_pos/*.fits'))) + \
            [1] * len(generate_dataset(nmbr_of_aug, glob.glob('data/test_neg/*.fits')))

  stats_.y_len = 2 * np.sqrt(stats_.eigen_values[1] *


No object found in data/train_pos\hh212_2015_band7_0.fits
No object found in data/train_pos\hh212_2015_band7_1.fits
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
No object found in data/train_neg\member.uid___A001_X133d_X1692._SDSS_J143029.88p133912.0__sci.spw19_21_23_25.cont.I.pbcor.fits


  group.res.data = 1 - (bvg / group.image.data)
  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  rms = np.sqrt(np.nanmean(x**2))


No object found in data/train_neg\member.uid___A001_X133d_X3567.G335a_sci.spw25_27_29_31.cont.I.tt0.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133d_X3aa1.HOPS-185_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133d_X3aa1.HOPS-337_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133d_X3ad4.HOPS-342_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133f_X2fa.Ref_090_SMC_SWBar_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133f_X30e.Tile_095_SMC_SWBar_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X134b_Xc6.P6_sci.spw16_18_20_22_24_26.cont.I.tt1.pbcor.fits


  stats_.y_len = 2 * np.sqrt(stats_.eigen_values[0] *


No object found in data/train_neg\member.uid___A001_X146e_Xe.AT2019qiz_sci.spw5_7_9_11.cont.I.tt0.pbcor.fits
No object found in data/train_neg\member.uid___A001_X14c2_X174.J1100p0846_sci.spw15_17_19.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X2fa_X152.ari_l.DCE064_sci.spw0_1_2_3_98023MHz.12m.cont.I.tt0.pbcor.fits
No object found in data/train_neg\member.uid___A001_X2fb_Xc21.ari_l.HG2830_sci.spw0_1_2_3_4_226769MHz.12m.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X2fe_X7e9.ari_l.Filament_1_sci.spw0_1_2_3_107773MHz.12m.cont.I.tt1.pbcor.fits
No object found in data/train_neg\member.uid___A001_X2fe_X7e9.ari_l.Filament_3_sci.spw0_1_2_3_107771MHz.12m.cont.I.tt0.pbcor.fits
No object found in data/train_neg\member.uid___A001_X33e_Xff.SMC0N69_sci.spw0_1_2_108225MHz.mos.7m.cont.I.tt0.pbcor.fits
No object found in data/train_neg\member.uid___A001_X879_Xa00.S09_sci.spw25_27_29_31_33_35_37_39_41_43_45_47_49.cont.I.pbcor.fits
No object found in data

  x_train = np.array(generate_dataset(nmbr_of_aug, glob.glob('data/train_pos/*.fits')) +


No object found in data/train_pos\hh212_2015_band7_0.fits
No object found in data/train_pos\hh212_2015_band7_1.fits
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
No object found in data/train_neg\member.uid___A001_X133d_X1692._SDSS_J143029.88p133912.0__sci.spw19_21_23_25.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133d_X3567.G335a_sci.spw25_27_29_31.cont.I.tt0.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133d_X3aa1.HOPS-185_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133d_X3aa1.HOPS-337_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133d_X3ad4.HOPS-342_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/train_neg\member.uid___A001_X133f_X2fa.Ref_090_SMC_SWBar_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/tra

  x_test = np.array(generate_dataset(nmbr_of_aug, glob.glob('data/test_pos/*.fits')) +


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
No object found in data/test_neg\member.uid___A001_X1273_X1e3._16__sci.spw19_21_23_25.cont.I.pbcor.fits
No object found in data/test_neg\member.uid___A001_X1273_X1e3._52__sci.spw19_21_23_25.cont.I.pbcor.fits
No object found in data/test_neg\member.uid___A001_X1273_X1e3._82__sci.spw19_21_23_25.cont.I.pbcor.fits
No object found in data/test_neg\member.uid___A001_X1273_X716.GAMA228432_sci.spw19_21_23_25.cont.I.tt0.pbcor.fits
No object found in data/test_neg\member.uid___A001_X1288_Xfc2.HH_46_sci.spw16_18_20_22.cont.I.pbcor.fits
No object found in data/test_neg\member.uid___A001_X1465_X2003.Ridge-M3_sci.spw16_18_20_22.cont.I.tt1.pbcor.fits
No object found in data/test_neg\member.uid___A001_X1465_X366a.J1830139501085150_sci.spw19_21_23_25.cont.I.pbcor.fits
No object found in data/test_neg\member.uid___A001_X1465_X6a.M12_665_sci.spw17_19_21_23.cont.I.pbcor.fits
No object found in data/test_neg\member.uid___A001_X

In [114]:
print(len(lbl_test) == len(x_test))
print(len(lbl_train) == len(x_train))

True
True


In [115]:
#print(len(x_train))
#print(x_test.shape)

#print(np.array(x_test).shape)

x_test_temp = np.empty(1)

for i in x_test:
    i_arr = np.array(i)
    x_test_temp.append(i_arr)


print(x_test_temp.shape)




# #convert x_train and x_test to png

# zscale = ZScaleInterval(contrast=0.25, nsamples=1)

# for i in range(len(x_train)):
#     x_train[i] = zscale(x_train[i])

# for i in range(len(x_test)):
#     x_test[i] = zscale(x_test[i])



# if K.image_data_format() == 'channels_first':
#     x_train = x_test.reshape(x_train.shape[0], 1, img_rows, img_cols)
#     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
#     input_shape = (1, img_rows, img_cols)
# else:
#     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
#     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
#     input_shape = (img_rows, img_cols, 1)

AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [None]:
# print(len(x_test))
# print(x_test[0].shape)

# if K.image_data_format() == 'channels_first':
#     x_train = x_test.reshape(x_train.shape[0], 1, img_rows, img_cols)
#     x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
#     input_shape = (1, img_rows, img_cols)
# else:
#     x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
#     x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
#     input_shape = (img_rows, img_cols, 1)



20
(98, 98)


ValueError: cannot reshape array of size 46 into shape (46,82,82,1)

In [118]:
# x_train = x_train.astype('float32')
# x_test = x_test.astype('float32')

# x_train /= 255
# x_test /= 255


y_train = keras.utils.np_utils.to_categorical(lbl_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(lbl_test, num_classes)


#Convert x_train and x_test to tensors

X_train = []
X_test = []

#for i in range(len(x_train)):
#    X_train.append(tf.convert_to_tensor(x_train[i]))

#for i in range(len(x_test)):
#    X_test.append(tf.convert_to_tensor(x_test[i]))

for arr in x_train:
    X_train.append(tf.convert_to_tensor(arr))

for arr in x_test:
    X_test.append(tf.convert_to_tensor(arr))

# X_train[18] = X_train[17]
# X_train[28] = X_train[26]
# X_test[6] = X_test[5]


#tf.convert_to_tensor(X_train)
#tf.convert_to_tensor(X_test)
X_train = tf.stack(X_train, axis=0, name='stack1')
X_test = tf.stack(X_test, axis=0, name='stack2')


#print(X_train)
#print(X_train[0])
#print(X_train[18])
print(X_train.shape)
print(X_test.shape)

InvalidArgumentError: {{function_node __wrapped__Pack_N_690_device_/job:localhost/replica:0/task:0/device:CPU:0}} Shapes of all inputs must match: values[0].shape = [98,98] != values[257].shape = [84,0] [Op:Pack] name: stack1

In [119]:
print(X_train[18].shape)
print(y_test.shape)

(98, 98)
(300, 2)


In [120]:
## Define model ##
model = Sequential()

models_sizes = [0.000001, 0.00001, 0.00005, 0.0001, 0.001]

# print(x_train[500])

epochs = 82

model.add(layers.Conv2D(filters=82, kernel_size=(5, 5),
          activation='relu', input_shape=(98, 98, 1)))
model.add(layers.AveragePooling2D(pool_size=(2, 2)))

model.add(layers.Conv2D(filters=128, kernel_size=(2, 2), activation='relu'))
model.add(layers.AveragePooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(500, activation='relu'))
model.add(Dense(300, activation='relu'))
model.add(Dense(2, activation='softmax'))

model.summary()

model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=tensorflow.keras.optimizers.SGD(learning_rate=0.1),
              metrics=['accuracy'],)

fit_info = model.fit(X_train, y_train,
                     batch_size=batch_size,
                     epochs=epochs,
                     verbose=1,
                     validation_data=(X_test, y_test))

score = model.evaluate(X_test, y_test, verbose=0, return_dict=True)

print(score)

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_28 (Conv2D)          (None, 94, 94, 82)        2132      
                                                                 
 average_pooling2d_28 (Avera  (None, 47, 47, 82)       0         
 gePooling2D)                                                    
                                                                 
 conv2d_29 (Conv2D)          (None, 46, 46, 128)       42112     
                                                                 
 average_pooling2d_29 (Avera  (None, 23, 23, 128)      0         
 gePooling2D)                                                    
                                                                 
 flatten_14 (Flatten)        (None, 67712)             0         
                                                                 
 dense_42 (Dense)            (None, 500)             

ValueError: Data cardinality is ambiguous:
  x sizes: 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 84, 98, 98, 98, 98, 0, 0, 98, 98, 98, 86, 98, 98, 95, 98, 98, 98, 97, 98, 98, 98, 3, 98, 98, 98, 98, 95, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 84, 98, 98, 98, 98, 98, 91, 98, 98, 98, 96, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98
  y sizes: 690
Make sure all arrays contain the same number of samples.