Author: Devin D. Whitten
Date:   November 11, 2018
Institute: University of Notre Dame
Email: dwhitten@nd.edu

In [2]:
 ![GitHub_Logo](Images/SPHINX_logo_v3.png)

/bin/sh: -c: line 0: syntax error near unexpected token `Images/SPHINX_logo_v3.png'
/bin/sh: -c: line 0: `[GitHub_Logo](Images/SPHINX_logo_v3.png)'


Please refer to Whitten et al 2018 for explanation of key functionality in SPHINX, as well as the README.md file. 
This is a basic demonstration of the temperature and metallicity determination routines, and use of SPHINX core functions.

In [8]:
import pandas as pd
import numpy as np
import param as param
import sys, os
sys.path.append("interface")

import train_fns, net_functions, network_array, io_functions

## Temperature Determination Routines

In [9]:
### Load target (science) and training catalogs

target = train_fns.Dataset(path=param.params['target_path'], 
                            variable='TEFF',
                            params=param.params, 
                            mode="TARGET")

    
training = train_fns.Dataset(path=param.params['segue_path'], 
                             variable="TEFF",
                             params=param.params, 
                             mode="SEGUE")

... Reading database:   datasets/IDR_201803_testing_native_feh.csv
Initial Network Size of:  TARGET 4387
... Reading database:   datasets/SEGUE_calibrated_catalog_GOLD.csv.gz
Initial Network Size of:  SEGUE 302604


In [11]:
### Format input columns and build colors
target.format_names()
target.format_colors()
target.get_input_stats(inputs="colors")

---------------------------------------------------------------------------------------------
...format_names()
Replacing:   ['MAG_6_gSDSS', 'MAG_6_rSDSS', 'MAG_6_iSDSS', 'MAG_6_J0395', 'MAG_6_J0410', 'MAG_6_J0430', 'MAG_6_J0515', 'MAG_6_J0660', 'MAG_6_J0861']
With:        ['gSDSS', 'rSDSS', 'iSDSS', 'F395', 'F410', 'F430', 'F515', 'F660', 'F861']
format_colors()
---------------------------------------------------------------------------------------------
TEFF  input statistics: 
	 gSDSS_rSDSS  :  0.349 0.101
	 gSDSS_iSDSS  :  0.473 0.138
	 gSDSS_F395  :  -0.528 0.285
	 gSDSS_F410  :  -0.278 -0.167
	 gSDSS_F430  :  -0.194 0.117
	 gSDSS_F515  :  0.118 -0.130
	 gSDSS_F660  :  0.367 0.129
	 gSDSS_F861  :  0.517 0.199
	 rSDSS_iSDSS  :  0.135 0.072
	 rSDSS_F395  :  -0.828 0.300
	 rSDSS_F410  :  -13.245 1.729
	 rSDSS_F430  :  -0.522 0.160
	 rSDSS_F515  :  -0.249 -0.152
	 rSDSS_F660  :  0.023 0.053
	 rSDSS_F861  :  0.168 0.128
	 iSDSS_F395  :  -0.986 0.368
	 iSDSS_F410  :  -0.711 0.194
	 iSDS

In [12]:
### Process training catalog
### Important to note that if you want to scale the 
### training set based on target set distributions, 
### set scale_frame = target.scale_frame.

training.process(scale_frame="self", 
                 threshold=75, 
                 SNR_limit=25, 
                 normal_columns=None,
                 set_bounds = True, 
                 bin_number=20, 
                 bin_size=200,
                 verbose=True, 
                 show_plot=True)



... Processing  TEFF  training set
remove_discrepant_variables():   75
I've not implemented this feature
SNR_threshold
Stars removed:   155809
---------------------------------------------------------------------------------------------
...format_names()
---------------------------------------------------------------------------------------------
faint_bright_limit()
custom columns:      Index(['Unnamed: 0', 'SPSPEC', 'SURVEY', 'RA', 'DEC', 'TEFF', 'TEFF_ERR',
       'TEFF_IRFM', 'FEH_ADOP', 'FEH_ADOP_ERR', 'gSDSS', 'rSDSS', 'iSDSS',
       'zMag', 'F395', 'F410', 'F430', 'F515', 'F660', 'F861', 'gMag_Sigma',
       'rMag_Sigma', 'iMag_Sigma', 'zMag_Sigma', 'F395Mag_Sigma',
       'F410Mag_Sigma', 'F430Mag_Sigma', 'F515Mag_Sigma', 'F660Mag_Sigma',
       'F861Mag_Sigma', 'FEH', 'FEH_ERR', 'CFE_COR', 'SNR', 'SPHINX_ID'],
      dtype='object')
	 minimum in: gSDSS 9.714089152244656
	 Current length after: gSDSS 100056
	 minimum in: rSDSS 12.2813568217552
	 Current length after: rSDSS 9959

In [13]:
target.set_scale_frame(training.scale_frame)
target.scale_photometry()

---------------------------------------------------------------------------------------------
...scale_photometry()


### Build Network Array

In [16]:
### Initialize    
Network_Array = network_array.Network_Array(training_set = training, 
                                             interp_frame=training.interp_frame,
                                             target_variable = "TEFF",
                                             scale_frame = training.scale_frame,
                                             param_file = param,
                                             input_type="colors",
                                             array_size=param.params['array_size'])

### Should really be generate_inputs()
Network_Array.set_input_type()

### Should really be generate_arrays()
Network_Array.generate_inputs(assert_band=["F410"], 
                              reject_band=['F430'])

Network input_type:   colors
['gSDSS_rSDSS', 'gSDSS_iSDSS', 'gSDSS_F395', 'gSDSS_F410', 'gSDSS_F430', 'gSDSS_F515', 'gSDSS_F660', 'gSDSS_F861', 'rSDSS_iSDSS', 'rSDSS_F395', 'rSDSS_F410', 'rSDSS_F430', 'rSDSS_F515', 'rSDSS_F660', 'rSDSS_F861', 'iSDSS_F395', 'iSDSS_F410', 'iSDSS_F430', 'iSDSS_F515', 'iSDSS_F660', 'iSDSS_F861', 'F395_F410', 'F395_F430', 'F395_F515', 'F395_F660', 'F395_F861', 'F410_F430', 'F410_F515', 'F410_F660', 'F410_F861', 'F430_F515', 'F430_F660', 'F430_F861', 'F515_F660', 'F515_F861', 'F660_F861']
...Generating input combinations
30260340  of given input type
... Generating TEFF network array
	pre-assert band:   30260340
... Asserting:  F410
	pre-assert band:   27152235
... Rejecting the following band:   F430
	 pre-assert colors length:    2904615
	pre-color rejection combinations:   2904615
2904615  total input combinations


### Train network array and evaluate performance

In [None]:
### Basic training function with iterations of outlier removal.
Network_Array.train(iterations=2)
Network_Array.eval_performance()
Network_Array.write_network_performance()

### Pick the top networks and exclude the rest.
Network_Array.skim_networks(select=25)


### Science Prediction and output

In [None]:
Network_Array.prediction(target, flag_thing = False)
 
Network_Array.write_training_results()
target.merge_master(array_size=param.params['array_size'])
 
Network_Array.write_training_results()
Network_Array.training_plots()
target.merge_master(array_size=param.params['array_size'])
target.save()

## Metallicity Determination Routines

Basic demonstration of routines for metallicity determination routines. Essentially the same as temperature demonstration, but might as well demonstrate anyway.

In [None]:
### Load target (science) and training catalogs
target = train_fns.Dataset(path=param.params['target_path'], 
                           variable='FEH',
                           params=param.params, mode="TARGET")

training = train_fns.Dataset(path=param.params['segue_path'], 
                             variable="FEH",
                             params=param.params, mode="SEGUE")