# Adding new objects to the main PCAReady and classlist files

Takes an input csv file holding the parameters of the objects to be included plus a folder containing the new spectra. It then processes these along the lines of the previous objects and then includes them in the main PCAReady and classlist files. It rejects duplicates.

**Requires the same _functions.py_ as used for the main PCAReady.txt**

### Notes:
For convenience, you may want to run this in its own folder, copying functions.py over as you do. The new object PCA/Classlist are saved in a ./PCAReady/ folder created in this directory, but the final save folder can be specfied to be your main PCAReady folder.


In [17]:
import numpy as np
import os
import pandas as pd
from functions import dered, z_range, target_wavelength_range, ProcessSpectra
import matplotlib.pyplot as plt
from glob import glob

## These are the user defined parameters
1) __new_obj_csv__ is the the csv file containing the information for the new objects

2) __extension__ is the extension given to the PCAReady and classlist files associated with the new objects, these will eventually be concatenated with the main PCAReady file.

3) __save__ is a boolean which determines whether the intermediate PCAReady file is saved. Set to False for debugging.

4) __ref_PCA__ and __ref_classlist__ are the physical locations of the main PCAReady and classlist files.

5) __ext__ is the extension for the final PCAReady/classlist files in the form e.g., PCAReady.\<ext\>.txt

6) __final_save_folder__ is the destination folder for the final PCAReady/classlist files
    

In [18]:
# Define the input CSV file and the extension for the PCAReady/classlist file output
new_obj_csv = 'new_objects.csv'
extension = ''

# Save PCAready file for the new objects? Set to False for debugging.
save = False

# main PCAready and classlist files (inc. location) to concatenate to
ref_PCA = ''
ref_classlist = ''

# define the extension of the concatenated PCAReady and classlist files, along with the save location of these files
ext = ''
final_save_folder = './PCA_ready'

##################### This just loads the csv

new_objects = pd.read_csv(f'./{new_obj_csv}', delim_whitespace = True)

## Pre-processing of the spectra to get them ready for PCA

In [19]:
def cut_spectrum(x, y, lo, high):
    '''
    Cuts a spectrum to certain wavelength limits
    '''
    
    lo_cut = np.argmin(abs(x - lo))
    high_cut = np.argmin(abs(x - high))
    x = x[lo_cut: high_cut + 1]
    y = y[lo_cut: high_cut + 1]
    
    return np.array(x), np.array(y)


def process_spectrum(sn = '', sn_class = 'UNK', E = 1e-5, z = 0, t = 0, dst = 'new_spectra', f = '',
                     low_cut = 0, high_cut = 10000 ):
    '''Process the observed spectra for each object by cutting to a required wavelength range,
    correcting for E(B-V), and redshift.
    Saves in the appropriate format for PCA processing.
    '''
        
    if os.path.isfile(f) == False:
        print(f'{f} does not exist, skpping')
        
        return None
    
    
    x,y = np.loadtxt(f, unpack = True, usecols = (0,1) )
        
    x, y = cut_spectrum(x, y, low_cut, high_cut)
        
    y = dered(x, y , 3.1, E)
        
    x = x / (1 + z)
        
    plt.plot(x,y)
        
    print(os.path.basename(f))
        
    m = list(zip(x,y))
        
    savename = f'{sn_class}_{sn}_{t:.2f}_.txt'
    
    if os.path.isdir(f'./{dst}') == False:
        print(f'Creating ./{dst}')
        os.mkdir(f'./{dst}')
    
    np.savetxt(f'./{dst}/{savename}', m, fmt = "%s" )
    plt.savefig(f'./{dst}/{savename}.pdf')
    plt.close()
        
             

for idx, row in new_objects.iterrows():

    process_spectrum(sn = row.SN, sn_class = row.classification, z = float(row.z), t = float(row.t), 
                     E = float(row.E), f = row.file, low_cut = row.low_cut, high_cut = row.high_cut)     

SN2009dc_2009-04-16_00-00-00_TNG_DOLORES_None.ascii
SN2009dc_2009-04-17_00-00-00_NOT_ALFOSC_None.ascii
SN2009dc_2009-04-18_00-00-00_NOT_ALFOSC_None.ascii
SN2009dc_2009-04-22_00-00-00_NOT_ALFOSC_None.ascii
SN2009dc_2009-04-28_00-00-00_CA-2.2m_CAFOS_None.ascii
SN2009dc_2009-04-30_00-00-00_CA-2.2m_CAFOS_None.ascii
SN2009dc_2009-05-02_00-00-00_TNG_DOLORES_None.ascii
SN2009dc_2009-05-03_00-00-00_CA-2.2m_CAFOS_None.ascii
SN2009dc_2009-05-05_00-00-00_NOT_ALFOSC_None.ascii
SN2009dc_2009-05-07_00-00-00_NOT_ALFOSC_None.ascii
SN2009dc_2009-05-08_00-00-00_NOT_ALFOSC_None.ascii
SN2009dc_2009-05-13_00-00-00_TNG_DOLORES_None.ascii
SN2009dc_2009-05-18_00-00-00_ESO-NTT_EFOSC2-NTT_None.ascii
SN2009dc_2009-05-28_00-00-00_Ekar_AFOSC_None.ascii
SN2009dc_2009-06-01_00-00-00_CA-2.2m_CAFOS_None.ascii
SN2009dc_2009-06-20_00-00-00_TNG_DOLORES_None.ascii
SN2009dc_2009-06-27_00-00-00_CA-2.2m_CAFOS_None.ascii
SN2009dc_2009-07-15_00-00-00_ESO-NTT_EFOSC2-NTT_None.ascii
SN2009dc_2009-08-02_00-00-00_CA-2.2m_CAFOS_None

## Process the spectra for the PCA and save the classlist and PCAReady file

In [20]:
# Set folders
pca_folder = './PCA_ready/'
files = glob('./new_spectra/*.txt' )
pca_ready_save = pca_folder + 'PCAReady.%s.txt' %extension
classification_list = pca_folder + 'classlist.%s.txt' %extension

# Create the PCAReady folder if it doesn't exist
if os.path.isdir(pca_folder) == False:
    print(f'Creating {pca_folder}\n')
    os.mkdir(pca_folder)
    
main=[]
lab =[]
count =0
for f in files:
    count  +=1

    base = os.path.basename(f)
    print('%s/%s Processing %s ' %(count,len(files),base))
    
    x0, y0 = np.loadtxt(f,unpack=True,usecols=(0,1))
    
    if np.all(x0[1:] >= x0[:-1], axis=0) != False:
            
        # this shouldn't process the spectra, instead we'll pass this to
        # the wavelength range checker next and then process the spectrum
        x,y = x0, y0 

        for z in z_range:
            x_z = x0 * (1 + z)
            
            if (x_z[0] < target_wavelength_range[0] - 50) and (x_z[-1] > target_wavelength_range[1] + 50):
                
                # cut extraneous stuff
                lowcut = 0
                hicut = np.argmin(abs( x_z - (target_wavelength_range[1] + 50)) )
                
                x_z = x_z[lowcut:hicut]
                y_z = y0[lowcut:hicut]
                
                x_z, y = ProcessSpectra(x = x_z, y = y_z ,wave_bins=8)
                

                if str(min(y))[0] in '0123456789':
                    
                    main.append(y)
                    lab.append('%.3f_%s'%(z,base))
                else:
                    print(base, 'NaN detected',str(min(y))[0], z)
                    
                # if reguired uncomment to save the individual spectra    
                #m=list(zip(x_z,y))
                #np.savetxt('/Users/Si/Documents/searn/SupernovaClassifier/processedspectra/%.3f_%s'%(z,base),m,fmt="%s")

if save:
    np.savetxt(pca_ready_save,main,fmt="%s")  
    np.savetxt(classification_list,lab,"%s")  

1/21 Processing Ia-SupCh_SN2009dc_126.20_.txt 
2/21 Processing Ia-SupCh_SN2009dc_35.40_.txt 
3/21 Processing Ia-SupCh_SN2009dc_7.30_.txt 
4/21 Processing Ia-SupCh_SN2009dc_11.40_.txt 
5/21 Processing Ia-SupCh_SN2009dc_61.30_.txt 
6/21 Processing Ia-SupCh_SN2009dc_54.40_.txt 
7/21 Processing Ia-SupCh_SN2009dc_97.20_.txt 
8/21 Processing Ia-SupCh_SN2009dc_10.40_.txt 
9/21 Processing Ia-SupCh_SN2009dc_5.50_.txt 
10/21 Processing Ia-SupCh_SN2009dc_-4.70_.txt 
11/21 Processing Ia-SupCh_SN2009dc_3.40_.txt 
12/21 Processing Ia-SupCh_SN2009dc_8.30_.txt 
13/21 Processing Ia-SupCh_SN2009dc_31.30_.txt 
14/21 Processing Ia-SupCh_SN2009dc_-8.50_.txt 
15/21 Processing Ia-SupCh_SN2009dc_16.50_.txt 
16/21 Processing Ia-SupCh_SN2009dc_21.50_.txt 
17/21 Processing Ia-SupCh_SN2009dc_80.30_.txt 
18/21 Processing Ia-SupCh_SN2009dc_-10.50_.txt 
19/21 Processing Ia-SupCh_SN2009dc_1.40_.txt 
20/21 Processing Ia-SupCh_SN2009dc_-9.60_.txt 
21/21 Processing Ia-SupCh_SN2009dc_164.30_.txt 


## Finally, concatenate the PCAReady and classlist files

In [40]:
def join_PCA(ref_PCAReady, ref_classlist, new_PCAReady, new_classlist, save_location = save_location, ext = ext):


    a1 = np.loadtxt(ref_PCAReady)
    c1 = np.loadtxt(ref_classlist, dtype = 'str')

    a2 = np.loadtxt(new_PCAReady)
    c2 = np.loadtxt(new_classlist, dtype = 'str')
    
    # We don't want duplicate items so we'll remove them
    duplicate_indices = [idx for idx, spec in enumerate(c2) if spec in set(c1)] 
    
    if len(duplicate_indices) > 0:
        print(f'Duplicate spectra found, removing\n')
        a2 = np.delete(a2, duplicate_indices)
        c2 = np.delete(c2, duplicate_indices)

    
    # if there are new objects to add, then do so
    if len(c2) > 0:
        print(f'Concatenating arrays...\n')
        a3 = np.concatenate( (a1, a2))
        c3 = np.concatenate( (c1, c2) )
        
        print(f'Saving PCAReady.{ext}.txt and classlist.{ext}.txt' )
        np.savetxt(f'{final_save_folder}/PCAReady.{ext}.txt', a3, fmt="%s")
        np.savetxt(f'{final_save_folder}/classlist.{ext}.txt', c3, fmt="%s")
        
    else:
        print('No new objects to add.')
    
join_PCA(ref_PCA, ref_classlist, pca_ready_save, classification_list)    

Duplicates found, removing

No new objects to add.
