In [1]:
import numpy as np
import pandas as pd

import random
import os
from astropy.io import fits
from astropy.table import Table

import matplotlib.pyplot as plt

from astropy.visualization import make_lupton_rgb
plt.style.use('dark_background')

In [2]:
def make_plot_all(objects, title, data, column_name):
    #print(title)
    for i in range(len(objects)):
        if(i%6 == 0):
            plt.figure(figsize=(16,9))
            for j in range(6):
                if(i+j > len(objects)-1): break
                plt.subplot(1,6,j+1)
                print(data['COADD_OBJECT_ID'].iloc[i+j])
                new_title = 'Prob: {:.4f}'.format(data[column_name].iloc[i+j])
                plt.title(new_title)
                rgb = make_lupton_rgb(objects[i+j][2], objects[i+j][1], objects[i+j][0], Q=11., stretch=40.)
                rgb = np.flipud(rgb)
                plt.imshow(rgb, aspect='equal')
                plt.xticks([], [])
                plt.yticks([], []) 
            plt.show()

In [3]:
filepath = '/Users/jimenagonzalez/research/DSPL/Searching-double-lenses/vit_cnn_multiclass/results/data/'
hdu_list = fits.open(filepath + 'Unique_positives_complete_fixed.fits')
print('Num. single candidates: ', len(hdu_list[1].data))
images_sing = hdu_list[1].data
images_sing = images_sing[:,0:3,:,:]
data_sing = pd.DataFrame(hdu_list[2].data)
hdu_list.close()

to_remove = pd.read_csv('single_remove.csv')
mask = data_sing['COADD_OBJECT_ID'].isin(to_remove['COADD_OBJECT_ID'])
data_sing = data_sing.drop(data_sing[mask].index)
data_sing = data_sing.drop_duplicates()
images_sing = images_sing[data_sing.index]
data_sing = data_sing.reset_index(drop=True)

print(len(images_sing), len(data_sing))

Num. single candidates:  20653
20639 20639


In [4]:
filepath = '/Users/jimenagonzalez/research/DSPL/Searching-double-lenses/vit_cnn_multiclass_double/results/'
hdu_list = fits.open(filepath + 'Unique_double_complete_fixed.fits')
print('Num. double candidates: ', len(hdu_list[1].data))
images_doub = hdu_list[1].data
images_doub = images_doub[:,0:3,:,:]
data_doub = pd.DataFrame(hdu_list[2].data)
hdu_list.close()

to_remove = pd.read_csv('double_remove.csv')
mask = data_doub['COADD_OBJECT_ID'].isin(to_remove['COADD_OBJECT_ID'])
data_doub = data_doub.drop(data_doub[mask].index)
data_doub = data_doub.drop_duplicates()
images_doub = images_doub[data_doub.index]
data_doub = data_doub.reset_index(drop=True)

print(len(images_doub), len(data_doub))

Num. double candidates:  2540
2538 2538


In [5]:
size = 0.0032
counter = 0
data_tmp_doub, data_tmp_sing = pd.DataFrame(columns=data_doub.columns), pd.DataFrame(columns=data_sing.columns)

for i in range(len(data_doub)):
    ra, dec = data_doub['RA'].iloc[i], data_doub['DEC'].iloc[i]
    data_neighbors = data_sing[data_sing['RA'] > ra - size][data_sing['RA'] < ra + size][data_sing['DEC'] > dec - size][data_sing['DEC'] < dec + size]
    if(len(data_neighbors) > 0):
        counter += 1
        for (index, row) in data_neighbors.iterrows():
            ra_tmp, dec_tmp = row['RA'], row['DEC']
            new_data = data_sing[data_sing['RA'] > ra_tmp - size][data_sing['RA'] < ra_tmp + size][data_sing['DEC'] > dec_tmp - size][data_sing['DEC'] < dec_tmp + size]
            data_neighbors = pd.concat([data_neighbors, new_data])
            
        data_neighbors = data_neighbors.drop_duplicates()
        data_tmp_sing = data_tmp_sing.append(data_neighbors)
        data_tmp_doub = data_tmp_doub.append(data_doub.iloc[i])
        
        #make_plot_all([images_doub[i]], 'title', data_doub.iloc[i])
        #make_plot_all(images_sing[data_neighbors.index], 'title', data_neighbors)
        
data_tmp_sing = data_tmp_sing.drop_duplicates() #Single candidates also in double candidates
data_tmp_doub = data_tmp_doub.drop_duplicates() #Double candidates in single candidates, may not the same exact image

print(len(data_tmp_sing), len(data_tmp_doub))

  import sys
  if sys.path[0] == "":


613 610


In [6]:
images_tmp_doub = images_doub[data_tmp_doub.index] #Images of double in intersection
images_tmp_sing = images_sing[data_tmp_sing.index] #Images of single in intersection

data_sing = data_sing.drop(data_tmp_sing.index)
images_sing = images_sing[data_sing.index] #Images of single candidates, none in intersection
data_sing = data_sing.reset_index(drop=True)

data_doub = data_doub.drop(data_tmp_doub.index)
images_doub = images_doub[data_doub.index] #Images of double candidates, none in intersection
data_doub = data_doub.reset_index(drop=True)

data_tmp_doub, data_tmp_sing = data_tmp_doub.reset_index(drop=True), data_tmp_sing.reset_index(drop=True)
data_tmp_doub = data_tmp_doub.rename(columns = {'PROB': 'PROB_DOUB'})
data_tmp_sing = data_tmp_sing.rename(columns = {'PROB': 'PROB_SING'})

mask = data_tmp_sing['COADD_OBJECT_ID'].isin(data_tmp_doub['COADD_OBJECT_ID'])
test = data_tmp_sing[mask]
print(len(data_tmp_sing), len(data_tmp_doub), len(test))

613 610 492


In [7]:
#Working space
numpix = 45
columns = list(data_tmp_sing.columns)
columns.append('PROB_DOUB')
inter_data = pd.DataFrame(columns = columns)
inter_images = np.zeros((1,3,numpix,numpix))

sing_save, doub_save = pd.read_csv('inter_single_save.csv'), pd.read_csv('inter_double_save.csv')
sing_add, doub_special = pd.read_csv('inter_single_save_additional.csv'), pd.read_csv('inter_special.csv')
sing_save = sing_save.append(sing_add)
doub_save = doub_save.append(doub_special)

for i in range(len(data_tmp_doub)):
    ra, dec = data_tmp_doub['RA'].iloc[i], data_tmp_doub['DEC'].iloc[i]
    data_neighbors = data_tmp_sing[data_tmp_sing['RA'] > ra - size][data_tmp_sing['RA'] < ra + size][data_tmp_sing['DEC'] > dec - size][data_tmp_sing['DEC'] < dec + size]
    
    for (index, row) in data_neighbors.iterrows():
        ra_tmp, dec_tmp = row['RA'], row['DEC']
        new_data = data_tmp_sing[data_tmp_sing['RA'] > ra_tmp - size][data_tmp_sing['RA'] < ra_tmp + size][data_tmp_sing['DEC'] > dec_tmp - size][data_tmp_sing['DEC'] < dec_tmp + size]
        data_neighbors = pd.concat([data_neighbors, new_data])
    data_neighbors = data_neighbors.drop_duplicates()
        
    if(len(data_neighbors) == 1 and data_tmp_doub['COADD_OBJECT_ID'].iloc[i] == data_neighbors['COADD_OBJECT_ID'].iloc[0]):
        #Saving the systems that are overlapped with the same ID
        tmp_row = data_tmp_doub.iloc[i]
        tmp_row['PROB_SING'] = data_neighbors['PROB_SING'].iloc[0]
        inter_data = inter_data.append(tmp_row)
        inter_images = np.append(inter_images, [images_tmp_doub[i]], axis = 0)
        #make_plot_all([images_tmp_doub[i]], 'title', pd.DataFrame(data_tmp_doub.iloc[i]).T)
        #make_plot_all(images_tmp_sing[data_neighbors.index], 'title', data_neighbors)
    else:
        #Saving systems that don't have the same image and/or have more than one neighbor
        if(len(doub_save[doub_save['COADD_OBJECT_ID'] == data_tmp_doub['COADD_OBJECT_ID'].iloc[i]]) > 0):
            tmp_row = data_tmp_doub.iloc[i]
            for j in range(len(data_neighbors)):
                if(j > 0): continue
                tmp_row['PROB_SING'] = data_neighbors['PROB_SING'].iloc[j]
                inter_images = np.append(inter_images, [images_tmp_doub[i]], axis = 0)
                inter_data = inter_data.append(tmp_row)
            #if(len(data_neighbors) > 1):  
            #    make_plot_all([images_tmp_sing[data_neighbors.iloc[j].name]], 'title', pd.DataFrame(data_neighbors.iloc[j]).T, 'PROB_SING')
            #    make_plot_all([images_tmp_doub[i]], 'title', pd.DataFrame(data_tmp_doub.iloc[i]).T, 'PROB_DOUB')
                
        mask = data_neighbors['COADD_OBJECT_ID'].isin(sing_save['COADD_OBJECT_ID'])
        sing_save_tmp = data_neighbors[mask]
        for j in range(len(sing_save_tmp)):
            tmp_row = sing_save_tmp.iloc[j]
            tmp_row['PROB_DOUB'] = data_tmp_doub['PROB_DOUB'].iloc[i]
            inter_images = np.append(inter_images, [images_tmp_sing[sing_save_tmp.iloc[j].name]], axis = 0)
            inter_data = inter_data.append(tmp_row)
        
inter_images = np.delete(inter_images, 0, axis = 0)
inter_data = inter_data.reset_index(drop=True)
inter_data = inter_data.drop_duplicates()
inter_data = inter_data.drop([422], axis=0)
inter_images = inter_images[inter_data.index]
inter_data = inter_data.reset_index(drop=True)

print(len(inter_data), len(inter_images))

  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cacher_needs_updating = self._check_is_chained_assignment_possible()
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer_missing(indexer, value)


610 610


In [8]:
def write_fit_file(name, x, data, inter):
    my_types = {'COADD_OBJECT_ID': int, 'TILENAME': str, 'HPIX_16384': int, 'HPIX_4096': int,
       'DNF_ZMEAN_SOF': float, 'RA': float, 'DEC': float, 'FLUX_RADIUS_G': float, 'FLUX_RADIUS_R': float,
       'FLUX_RADIUS_I': float, 'FLUX_RADIUS_Z': float, 'KRON_RADIUS': float, 'GAP_FLUX_G': float,
       'MOF_BDF_FLUX_G': float, 'MOF_PSF_FLUX_G': float, 'SOF_BDF_FLUX_G': float, 'SOF_PSF_FLUX_G': float,
       'MAG_AUTO_G': float, 'MAG_APER_4_G': float, 'MAG_APER_8_G': float, 'SOF_BDF_G_1': float,
       'SOF_BDF_G_2': float, 'IMAFLAGS_ISO_G': int, 'IMAFLAGS_ISO_R': int, 'IMAFLAGS_ISO_I': int,
       'IMAFLAGS_ISO_Z': int, 'EXT_COADD': int}
    if(inter):
        tmp_types = {'PROB_SING': float, 'PROB_DOUB': float}
        my_types.update(tmp_types)
    else:
        tmp_types = {'PROB': float}
        my_types.update(tmp_types)
    data = data.astype(my_types)
    
    primary = fits.PrimaryHDU()
    image = fits.ImageHDU(x, name="IMAGE")
    table_data = Table.from_pandas(data)
    table = fits.BinTableHDU(data = table_data)
    hdu_list = fits.HDUList([primary, image, table])
    hdu_list.writeto(file_path + name + '.fits', overwrite=True)   
    hdu_list.close()

In [9]:
file_path = 'fit_files/'

write_fit_file('single', images_sing, data_sing, False)
write_fit_file('double', images_doub, data_doub, False)
write_fit_file('inter', inter_images, inter_data, True)