In [1]:
import numpy as np
import pandas as pd
import os
import ipython_bell
import random
import codecs

from astropy.io import fits
from astropy.table import Table, Column
from tqdm.notebook import tqdm

In [2]:
data = pd.read_csv('data/in_sled2.csv')
data = data.astype({'COADD_OBJECT_ID': int, 'Y3_COADD_OBJECT_ID': int})
print(len(data))

data = data[data['score'].notnull()]
data['score'] = pd.to_numeric(data['score'], errors='coerce')
data = data.dropna(subset=['score'])
data = data.reset_index(drop=True)
data = data.astype({'score': float})
data = data.drop_duplicates(subset=['COADD_OBJECT_ID'])
print(len(data))
data.head()

1687
1665


Unnamed: 0,K_RA,K_DEC,K_SCORE,Y3_COADD_OBJECT_ID,J_RA,J_DEC,score_sims,score_real,score_both,COADD_OBJECT_ID,...,score,image_sep,info,n_img,flag,image_conf,lens_type,source_type,contaminant_type,papers
0,2.146579,-39.377357,0.001118,182434686,2.146579,-39.377357,0.0604,0.0,0.0,1046785880,...,1.0,,,,CANDIDATE,,Galaxy,,,2019ApJS..243...17J
1,1.117249,-38.735816,0.190351,142345819,1.117249,-38.735816,0.9792,0.0,0.0,1037197500,...,1.0,,,,CANDIDATE,,Galaxy,,,2019ApJS..243...17J
2,0.719928,-38.28619,0.999342,142322552,0.719928,-38.28619,0.2046,0.0,0.0,1037154929,...,2.0,,,,CANDIDATE,,Galaxy,,,2022A&A...668A..73R
3,359.35464,-39.374435,0.999394,213772612,359.35464,-39.374435,0.0104,0.0,0.0,1031999366,...,1.0,,,,CANDIDATE,,Galaxy,,,2019ApJS..243...17J
4,359.132439,-38.75275,2.6e-05,172759386,359.132439,-38.75275,0.9329,0.0,0.0,1028411116,...,1.0,,,,CANDIDATE,,Galaxy,,,"2019ApJS..243...17J, 2022arXiv220602764S"


In [3]:
x = np.zeros((1,3,75,75))
path_cut = 'data/Fit_files/'
end_name = ['_g.fits', '_i.fits', '_r.fits']
list_coadd_ids = []

for file in tqdm(os.listdir(path_cut)):
    if(file[0:3] != 'DES'): continue
    path2 = path_cut + file + '/'
    for file2 in (os.listdir(path2)):
        coadd_id_tmp = file2
        for file3 in (os.listdir(path2 + file2)):
            if(file3[-6:] == 'g.fits'):
                path3 = path2 + file2 + '/'+ file3[:-7]
                with fits.open(path3 + end_name[0]) as hdul: cutout_g = hdul[0].data
                with fits.open(path3 + end_name[1]) as hdul: cutout_i = hdul[0].data
                with fits.open(path3 + end_name[2]) as hdul: cutout_r = hdul[0].data
                
                element = np.array([cutout_g, cutout_r, cutout_i])
                x = np.append(x, [element], axis = 0)
                list_coadd_ids.append(int(coadd_id_tmp))
                
x = np.delete(x, 0, axis = 0)

  0%|          | 0/1437 [00:00<?, ?it/s]

In [4]:
new_data = pd.DataFrame()
new_data['COADD_OBJECT_ID'] = list_coadd_ids

data.set_index('COADD_OBJECT_ID', inplace=True)
new_data.set_index('COADD_OBJECT_ID', inplace=True)
data = data.reindex(new_data.index)
# Reset index if you want to return COADD_OBJECT_ID to a column
data.reset_index(inplace=True)

print(x.shape, len(data))
data.head()

(1672, 3, 75, 75) 1672


Unnamed: 0,COADD_OBJECT_ID,K_RA,K_DEC,K_SCORE,Y3_COADD_OBJECT_ID,J_RA,J_DEC,score_sims,score_real,score_both,...,score,image_sep,info,n_img,flag,image_conf,lens_type,source_type,contaminant_type,papers
0,1538255919,78.309585,-52.505695,0.944468,413391817.0,78.309585,-52.505695,0.0,0.0,0.0,...,2.0,,,,CANDIDATE,,Galaxy,,,"2022A&A...668A..73R, 2022arXiv220602764S"
1,1537731200,77.456831,-52.455477,0.411994,413390304.0,77.456831,-52.455477,0.0,0.0,0.0,...,1.8,7.72,,,CANDIDATE,,Galaxy,,,2017ApJS..232...15D
2,1510424134,72.5878,-50.743626,9e-06,483404421.0,72.5878,-50.743626,0.0,0.0,0.0,...,1.33,,,,CANDIDATE,,Galaxy,,,"2019ApJS..243...17J, 2019MNRAS.484.5330J"
3,1044445712,2.526065,3.9112,8e-06,178846367.0,2.526065,3.9112,0.188,0.0,0.0,...,2.0,,,,CANDIDATE,,Galaxy,,,2022ApJ...932..107S
4,1478069288,66.905377,-22.08027,0.996424,498287987.0,66.905377,-22.08027,0.9188,0.0,0.0,...,1.0,,,,CANDIDATE,,Galaxy,,,"2022A&A...668A..73R, 2022arXiv220602764S"


In [5]:
# Ensure all object dtype columns are strings and encode them to bytes
for col in data.select_dtypes(include=['object']).columns:
    data[col] = data[col].apply(lambda x: x.encode('utf-8') if isinstance(x, str) else x)

# Convert the pandas DataFrame to an Astropy Table
columns = []
for colname in data.columns:
    if data[colname].dtype == 'object':
        col = Column(name=colname, data=data[colname], dtype='S')
    else:
        col = Column(name=colname, data=data[colname])
    columns.append(col)


In [6]:
#table_data = Table.from_pandas(data)
table_data = Table(columns)

name = 'in_sled2'
primary = fits.PrimaryHDU()
image = fits.ImageHDU(x, name="IMAGE")
#table = fits.BinTableHDU(data = table_data)
#table = fits.BinTableHDU(table_data.as_array(), character_as_bytes=False)
table = fits.BinTableHDU(table_data)
hdu_list = fits.HDUList([primary, image, table])
hdu_list.writeto('data/' + name + '.fits', overwrite=True)