In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
import numpy as np
import matplotlib.pyplot as plt
from astropy.io import fits
from astropy.table import Table
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import re
import shutil
import time
sns.set()

In [2]:
def copyfiles_fromfolder_tofolder(Root_dir,target_folder,extension):
    RootDir1 = str(Root_dir)
    TargetFolder = str(target_folder)
    for root, dirs, files in os.walk((os.path.normpath(RootDir1)), topdown=False):
        for name in files:
            if name.endswith(str(extension)):
                SourceFolder = os.path.join(root,name)
                shutil.copy2(SourceFolder, TargetFolder)
                
def get_filenames(path='.', extension=None, pattern=None, identifiers=None, include_path=False):
   
    # retrieve all filenames from the directory
    filename_list = os.listdir(path)
    
    # keep all filenames with the proper extension
    if extension is not None:
        
        filename_list = [filename for filename in filename_list if
                         filename[-len(extension):] == extension]
        
    # keep all filenames that match the pattern
    if pattern is not None:
        filename_list = [filename for filename in filename_list if re.search(pattern, filename)]
        
    # keep all filenames that match the identifiers provided
    if identifiers is not None:
        storage_list = []
        
        try:
            for ident in identifiers:
                storage_list.extend([filename for filename in filename_list if str(ident) in filename])
                
        except TypeError:
            print(identifiers, 'is not a list, tuple, or otherwise iterable')
            
        else:
            filename_list = storage_list
            
    if include_path:
        filename_list = [path + filename for filename in filename_list]
        
    return filename_list


def get_filevalues(path, filename_list): 
    
    list_fluxarrays = []
    list_classtype = []
    list_noise = []
    list_wavelength = []
    list_redshift = []
    
    
    for i in range(len(filename_list)):
        with fits.open(str(path) +str(filename_list[i])+ "", memmap = False ) as hdul:
            
            data_c = hdul['COADD'].data
            data_s = hdul['SPALL'].data
            
            flux_val = data_c.field("flux")
            list_fluxarrays.append(flux_val) 
            
            classtype = data_s.field('CLASS')
            list_classtype.append(classtype)
            
            noise_val = data_s.field('SN_MEDIAN_ALL')
            list_noise.append(noise_val)
            
            wavelength_val = data_c.field('loglam')
            list_wavelength.append(wavelength_val)
            
            redshift_val = data_s.field('Z')
            list_redshift.append(redshift_val)
            
            values = {'FLUX': list_fluxarrays, 'CLASS': list_classtype, 'NOISE': list_noise,\
                      'WAVE': list_wavelength, 'REDSHIFT': list_redshift}
            
            hdul.close()
            del hdul['COADD'].data
            del hdul['PRIMARY'].data
            del hdul['SPALL'].data
            del hdul
            
    return values


In [3]:
stardata = get_filenames("/Users/matt/Desktop/DESI_Research/DESI_ML/good_stars/", '.fits')

FileNotFoundError: [Errno 2] No such file or directory: '/Users/matt/Desktop/DESI_Research/DESI_ML/good_stars/'

In [None]:
star_dict = get_filevalues("/Users/matt/Desktop/DESI_Research/DESI_ML/good_stars/", stardata)

In [None]:
star_flux = star_dict['FLUX']
star_labels = star_dict['CLASS'] 

In [None]:
# get list of the lengths of the flux arrays
star_fluxlen_list = [len(star_flux[i]) for i in range(len(star_flux))]

# create list of all the flux lengths that are greater than 4550
cut_star_fluxlen_list = [i for i in star_fluxlen_list if i >= 4550]

# creates array of the star fluxs that have a length greater than 4550
filtered_star_flux_list = np.array(star_flux)[np.array(cut_star_fluxlen_list)]
print(len(filtered_star_flux_list))

# creates array of the star labels that correspond to 
filtered_star_labels_list = np.array(star_labels)[np.array(cut_star_fluxlen_list)]
print(len(filtered_star_labels_list))

# creates array of star fluxs that have all the same length ie len of 4550
star_fluxlen_same = [filtered_star_flux_list[i][:4550] for i in range(len(filtered_star_flux_list))]
starflux_same = star_fluxlen_same

In [None]:
star_tensor = np.ones((5013,1,4550,1))

for i in range(5013):
    spec = starflux_same[i]
    star_tensor[i,0,:,0] = spec[:]
    
print(np.shape(star_tensor))

In [None]:
print((filtered_star_flux_list[0][100:110])) # why is the rounding different between these two?
print((star_tensor[0,0,100:110,0]))

In [None]:
star_train_images = star_tensor[:3013, :, :, :]
star_train_labels = np.ones(3013)

star_val_images = star_tensor[3013:4013, :, :, :]
star_val_labels = np.ones(1000,)

star_test_images = star_tensor[4013:, :, :, :]
star_test_labels = np.ones(1000,)

In [None]:
print(np.shape(star_train_images))
print(np.shape(star_train_labels))

print(np.shape(star_val_images))
print(np.shape(star_val_labels))

print(np.shape(star_test_images))
print(np.shape(star_test_labels))

In [None]:
model = keras.Sequential()                                      #input_shape = (height, width, channels)
model.add(keras.layers.Conv2D(64, (1,32) ,activation='relu',input_shape=(1,4550,1),padding='same',\
                              data_format='channels_last'))
model.add(keras.layers.MaxPooling2D((1,7), strides=(1,4)))
model.add(keras.layers.Conv2D(72, (1,16),activation='relu',padding = 'same'))
model.add(keras.layers.MaxPooling2D((1,7), strides=(1,4)))
model.add(keras.layers.Flatten())
#potentially add dropout here at value = 0.5
model.add(keras.layers.Dense(units = 16, activation = 'relu'))
model.add(keras.layers.Dense(units = 1, activation = 'sigmoid'))

model.compile(loss='binary_crossentropy', optimizer = 'rmsprop', metrics = ['accuracy'])

In [None]:
history = model.fit(star_train_images,
star_train_labels,
epochs=10,
batch_size= 64,
validation_data=(star_val_images, star_val_labels))

In [None]:
test_loss, test_acc = model.evaluate(star_test_images, star_test_labels)

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()