In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.table import Table
import numpy as np
import os
import shutil
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

In [4]:
def quasar_z_range(redshifts, psfmag, plate, mjd, fiberid): # this gives us the values of quasars we want
    
    fits_image_totalQ = fits.open("/Users/matt/Desktop/DESI_Research/DESI_ML/DR14_Q_spectra/DR14Q_v4_4.fits")
    hdul = fits_image_totalQ # this block of code is just reading in the data
    quasar_data = hdul["DR14Q_v4_4"].data
    
    redshift = quasar_data.field(str(redshifts)) # making the redshift vector
    psfmagtable = quasar_data.field(str(str(psfmag))) # the light magnitudes are in a table
    rmagvec = psfmagtable[:,2] # the r band is the 3 entry in the table, starts at 0 index
    plate = quasar_data.field(str(plate)) # making the plate vector
    mjd = quasar_data.field(str(mjd)) # making the mjd vector
    fiber = quasar_data.field(str(fiberid)) # making fiber vector
    
    cut_redshifts = (redshift >= 2.5) & (redshift <= 3.) # gives boolean list with T in the range 2-3
    cut_rband = rmagvec < 19.5 # the range we're looking for in r band
    # ^ this range is opposite than what Professor Rockosi said, but it's the only way to get a sizable sample size
    cutlist = (cut_rband) & (cut_redshifts) # ma
    
    filtered_plate = np.array(plate)[np.array(cutlist)] # gives the values in the ID array for the Ts in cut list
    filtered_mjd = np.array(mjd)[np.array(cutlist)] # same as above but for the redshift array
    filtered_fiberid = np.array(fiber)[np.array(cutlist)] # same as above but for the noise array
    
    cut_values = {"PLATE":filtered_plate, "MJD":filtered_mjd, "FIBER":filtered_fiberid} # puts these new lists
                                                                                             # into a dictionary
    
    return cut_values

def star_dict(filename, sep, SN):
    # this gives the df that starts with the smallest 38898 S/N in the total df
    #df1 = df.nsmallest(38898, 'snMedian')
    
    df = pd.read_csv(str(filename), sep= str(sep))
    #df1 = df[[str(SN)]]
    #df1 = df.nsmallest(38898, 'snMedian')


    # gets the plate values for the smallest 38898 SN values
    dfplate = df[['plate']]

    # gets the mjd values for the smallest 38898 SN values
    dfmjd = df[['mjd']]

    # gets the fiberid values for the smallest 38898 SN values
    dffiberid = df[['fiberid']]
    
    # just gives the plate values (?)
    dfplate_arr = dfplate.values
    dfmjd_arr = dfmjd.values
    dffiberid_arr = dffiberid.values

    # converts the arrays to lists of integers
    int_platelist = [int(i) for i in dfplate_arr]
    int_mjdlist = [int(i) for i in dfmjd_arr]
    int_fiberidlist = [int(i) for i in dffiberid_arr]

    #makes list of integers as arrays of integers
    int_platearr = np.asarray(int_platelist)
    int_mjdarr = np.asarray(int_mjdlist)
    int_fiberidarr = np.asarray(int_fiberidlist)
    
    #creates dictionary of the arrays of integers for plates, mjd, fiberid bc if it was still a list
    #the function that creates the txtfile wouldn't work
    starvalue_dict = {"PLATE": int_platearr, "MJD": int_mjdarr, "FIBER": int_fiberidarr}
    
    return starvalue_dict


def writeresultdict(resultsdict, outfileroot, txtfile_name, outfileprefix=None):
    
    mainoutfilename = outfileroot + str(txtfile_name)
    with open(mainoutfilename, 'w') as fout:
        donutsum = resultsdict#['donut_summary'] 
        for i in range(len(resultsdict['MJD'])):
            outstr =  '{:04d}'.format((donutsum["PLATE"][i])) + '/spec'+'-' +\
            '{:04d}'.format((donutsum["PLATE"][i]))+ '-'+\
            '{:05d}'.format(donutsum["MJD"][i]) + '-' + '{:04d}'.format(donutsum["FIBER"][i]) +  '.fits' + '\n'
            fout.write(outstr)
            
            
def copyfiles_fromfolder_tofolder(Root_dir,target_folder,extension):
    RootDir1 = str(Root_dir)
    TargetFolder = str(target_folder)
    for root, dirs, files in os.walk((os.path.normpath(RootDir1)), topdown=False):
        for name in files:
            if name.endswith(str(extension)):
                SourceFolder = os.path.join(root,name)
                shutil.copy2(SourceFolder, TargetFolder)

In [5]:
quasar_values = quasar_z_range("Z", "PSFMAG", "PLATE", "MJD", "FIBERID")

print("Min plate value = " +str(min(quasar_values['PLATE'])))
print("Max plate value = " +str(max(quasar_values['PLATE'])))
print('Number of good quasars = ' +str(len(quasar_values['PLATE'])))


Min plate value = 269
Max plate value = 8873
Number of good quasars = 7030


In [6]:
star_values = star_dict('starsformatt.csv', ',', 'snMedian')

print("Min plate value = " +str(min(star_values['PLATE'])))
print("Max plate value = " +str(max(star_values['PLATE'])))
print('Number of good quasars = ' +str(len(star_values['PLATE'])))


Min plate value = 266
Max plate value = 10000
Number of good quasars = 38898


In [4]:
writeresultdict(quasar_values,"/Users/matt/Desktop/DESI_Research/DESI_ML/", "quasars.txt")

In [26]:
writeresultdict(star_values,"/Users/matt/Desktop/DESI_Research/DESI_ML/", "stars.txt")

In [5]:
copyfiles_fromfolder_tofolder("Quasars_folder", "Quasars_data",".fits")

In [7]:
copyfiles_fromfolder_tofolder("Stars_folder", "Stars_data",".fits")