In [1]:
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.table import Table
import numpy as np
import seaborn as sns
import os
import shutil
import pickle
import re
import time
import sys
sns.set()

In [2]:
def copyfiles_fromfolder_tofolder(Root_dir,target_folder,extension):
    RootDir1 = str(Root_dir)
    TargetFolder = str(target_folder)
    for root, dirs, files in os.walk((os.path.normpath(RootDir1)), topdown=False):
        for name in files:
            if name.endswith(str(extension)):
                SourceFolder = os.path.join(root,name)
                shutil.copy2(SourceFolder, TargetFolder)

def get_filenames(path='.', extension=None, pattern=None, identifiers=None, include_path=False):
   
    # retrieve all filenames from the directory
    filename_list = os.listdir(path)
    
    # keep all filenames with the proper extension
    if extension is not None:
        
        filename_list = [filename for filename in filename_list if
                         filename[-len(extension):] == extension]
        
    # keep all filenames that match the pattern
    if pattern is not None:
        filename_list = [filename for filename in filename_list if re.search(pattern, filename)]
        
    # keep all filenames that match the identifiers provided
    if identifiers is not None:
        storage_list = []
        
        try:
            for ident in identifiers:
                storage_list.extend([filename for filename in filename_list if str(ident) in filename])
                
        except TypeError:
            print(identifiers, 'is not a list, tuple, or otherwise iterable')
            
        else:
            filename_list = storage_list
            
    if include_path:
        filename_list = [path + filename for filename in filename_list]
        
    return filename_list


def get_filevalues(path, filename_list): 
    
    # empty lists 
    list_plate = []
    list_mjd = []
    list_fiber = []
    list_fluxarrays = []
    list_classtype = []
    list_noise = []
    list_wavelength = []
    list_redshift = []
    list_psfmag = []
    list_g = []
    list_r = []
    list_ra = []
    list_dec = []
    list_ivar = []
    
    # going through all the fits files
    for i in range(len(filename_list)):
        with fits.open(str(path) +str(filename_list[i])+ "", memmap = False ) as hdul:
            
            data_c = hdul['COADD'].data 
           
            # the 2nd HDU is different in certain quasars
            # this is appending all the ones with "SPALL" as the their 2nd HDU
            if hdul[2].name == "SPALL":
                
                data_s = hdul['SPALL'].data
                
                flux_val = data_c.field("flux")
                list_fluxarrays.append(flux_val) 
                
                
                
                plate_val = data_s.field('PLATE')
                list_plate.append(plate_val)
                
                mjd_val = data_s.field('MJD')
                list_mjd.append(mjd_val)
                
                fiber_val = data_s.field('FIBERID')
                list_fiber.append(fiber_val)
                
                ivar_val = data_c.field('IVAR')
                list_ivar.append(ivar_val)
                
                psfmap_val = data_s.field('PSFMAG')
                list_psfmag.append(psfmap_val)
                
                g_val = data_s.field('PSFMAG')[0][1]
                list_g.append(g_val)
                
                r_val = data_s.field('PSFMAG')[0][2]
                list_r.append(r_val)
                
            
            
                classtype = data_s.field('CLASS')
                list_classtype.append(classtype)
            
                noise_val = data_s.field('SN_MEDIAN_ALL')
                list_noise.append(noise_val)
            
                wavelength_val = data_c.field('loglam')
                list_wavelength.append(wavelength_val)
            
                redshift_val = data_s.field('Z')
                list_redshift.append(redshift_val)
                
                ra_val = data_s.field('RA')
                list_ra.append(ra_val)
                
                dec_val = data_s.field('DEC')
                list_dec.append(dec_val)
                
                
                del hdul['SPALL'].data
            
            # this is appending all the values for the quasars with "SPECOBJ" as their 2nd HDU
            elif hdul[2].name == "SPECOBJ":
                
                data_s = hdul['SPECOBJ'].data
                
                flux_val = data_c.field("flux")
                list_fluxarrays.append(flux_val) 
                
                
                
                
                plate_val = data_s.field('PLATE')
                list_plate.append(plate_val)
                
                mjd_val = data_s.field('MJD')
                list_mjd.append(mjd_val)
                
                fiber_val = data_s.field('FIBERID')
                list_fiber.append(fiber_val)
                
                
                
                psfmap_val = data_s.field('PSFMAG')
                list_psfmag.append(psfmap_val)
                
                g_val = data_s.field('PSFMAG')[0][1]
                list_g.append(g_val)
                
                r_val = data_s.field('PSFMAG')[0][2]
                list_r.append(r_val)
                
                
            
                classtype = data_s.field('CLASS')
                list_classtype.append(classtype)
            
                noise_val = data_s.field('SN_MEDIAN_ALL')
                list_noise.append(noise_val)
            
                wavelength_val = data_c.field('loglam')
                list_wavelength.append(wavelength_val)
            
                redshift_val = data_s.field('Z')
                list_redshift.append(redshift_val)
                
                ra_val = data_s.field('RA')
                list_ra.append(ra_val)
                
                dec_val = data_s.field('DEC')
                list_dec.append(dec_val)
                
                
                del hdul['SPECOBJ'].data
            
            else:
                pass
            
            
            values = {'FLUX': list_fluxarrays, 'CLASS': list_classtype, 'NOISE': list_noise,\
                      'WAVE': list_wavelength, 'REDSHIFT': list_redshift, 'PLATE': list_plate,\
                      'MJD': list_mjd, 'FIBER': list_fiber, 'PSFMAG': list_psfmag, 'R': list_r,\
                      'G': list_g, 'RA': list_ra, 'DEC': list_dec, 'IVAR': list_ivar}
            
            hdul.close()
            del hdul['COADD'].data
            del hdul['PRIMARY'].data
            del hdul
            
    return values    

def save_data_to_disk(file_name, saved_variable):
    
    filename = str(file_name)
    outfile = open(filename,'wb')
    
    pickle.dump(saved_variable,outfile)
    outfile.close()
    
def save_as_pickled_object(obj, filepath):
    """
    This is a defensive way to write pickle.write, allowing for very large files on all platforms
    """
    max_bytes = 2**31 - 1
    bytes_out = pickle.dumps(obj)
    n_bytes = sys.getsizeof(bytes_out)
    with open(filepath, 'wb') as f_out:
        for idx in range(0, n_bytes, max_bytes):
            f_out.write(bytes_out[idx:idx+max_bytes])

In [3]:
copyfiles_fromfolder_tofolder("/Users/matt/Desktop/DESI_Research/DESI_ML/Data/new_stars_folder/",\
                              "/Users/matt/Desktop/DESI_Research/DESI_ML/Data/new_stars_data/",".fits")

In [4]:
stardata = get_filenames("/Users/matt/Desktop/DESI_Research/DESI_ML/data/new_stars_data/", extension='.fits')

In [5]:
star_dict = get_filevalues("/Users/matt/Desktop/DESI_Research/DESI_ML/data/new_stars_data/", stardata)

In [6]:
save_as_pickled_object(star_dict, "/Users/matt/Desktop/DESI_Research/DESI_ML/var_CNN/Dictionaries/object_dict/star_dict")


In [7]:
#save_as_pickled_object(star_dict,'star_dict' )

In [8]:
# flux = star_dict['FLUX']
# classification = star_dict['CLASS']
# noise = star_dict['NOISE']
# wave = star_dict['WAVE']
# redshift = star_dict['REDSHIFT']
# plate = star_dict['PLATE']
# mjd = star_dict['MJD']
# fiber = star_dict['FIBER']
# psfmag = star_dict['PSFMAG']
# g = star_dict['G']
# r = star_dict['R']

In [9]:
# print(len(flux))
# print(len(classification))
# print(len(noise))
# print(len(wave))
# print(len(redshift))
# print(len(plate))
# print(len(mjd))
# print(len(fiber))
# print(len(psfmag))
# print(len(g))
# print(len(r))

In [10]:
# print(len(flux[:17776]))

In [11]:
# star_dict1 = {'FLUX': flux[:17776], 'CLASS': classification[:17776], 'NOISE': noise[:17776],\
#                 'WAVE': wave[:17776], 'REDSHIFT': redshift[:17776], 'PLATE': plate[:17776],\
#                 'MJD': mjd[:17776], 'FIBER': fiber[:17776], 'PSFMAG': psfmag[:17776],\
#                 'G': g[:17776], 'R': r[:17776]}

In [12]:
# filename = 'new_star_data_dict1'
# outfile = open(filename,'wb')

# pickle.dump(star_dict1,outfile, protocol = 0)
# outfile.close()

In [13]:
# star_dict2 = {'FLUX': flux[17776:35551], 'CLASS': classification[17776:35551], 'NOISE': noise[17776:35551],\
#                 'WAVE': wave[17776:35551], 'REDSHIFT': redshift[17776:35551], 'PLATE': plate[17776:35551],\
#                 'MJD': mjd[17776:35551], 'FIBER': fiber[17776:35551], 'PSFMAG': psfmag[17776:35551],\
#                 'G': g[17776:35551], 'R': r[17776:35551]}

In [14]:
# filename = 'new_star_data_dict2'
# outfile = open(filename,'wb')

# pickle.dump(star_dict2,outfile, protocol = 0)
# outfile.close()

In [15]:
# star_dict3 = {'FLUX': flux[35551:53326], 'CLASS': classification[35551:53326], 'NOISE': noise[35551:53326],\
#                 'WAVE': wave[35551:53326], 'REDSHIFT': redshift[35551:53326], 'PLATE': plate[35551:53326],\
#                 'MJD': mjd[35551:53326], 'FIBER': fiber[35551:53326],'PSFMAG': psfmag[35551:53326],\
#                 'G': g[35551:53326], 'R': r[35551:53326] }

In [16]:
# filename = 'new_star_data_dict3'
# outfile = open(filename,'wb')

# pickle.dump(star_dict3,outfile, protocol = 0)
# outfile.close()

In [17]:
# star_dict4 = {'FLUX': flux[53326:], 'CLASS': classification[53326:], 'NOISE': noise[53326:],\
#                 'WAVE': wave[53326:], 'REDSHIFT': redshift[53326:], 'PLATE': plate[53326:],\
#                 'MJD': mjd[53326:], 'FIBER': fiber[53326:], 'PSFMAG': psfmag[53326:],\
#                 'G': g[53326:], 'R': r[53326:] }

In [18]:
# filename = 'new_star_data_dict4'
# outfile = open(filename,'wb')

# pickle.dump(star_dict4,outfile, protocol = 0)
# outfile.close()