# Performance Test of `sedkit`
Th purpose of this notebook is to test the performance of `sedkit` across the main sequence by comparing the results of this software against well characterized stars in the literature.

In [1]:
from sedkit import SED, Catalog, ModelGrid
from astropy import units as q
from astroquery.xmatch import XMatch
from multiprocessing.pool import ThreadPool
from bokeh.io import output_notebook
output_notebook()

bt = ModelGrid('BT-Settl', ['alpha', 'logg', 'teff', 'meta'], q.AA, q.erg/q.s/q.cm**2/q.AA, ref='2014IAUS..299..271A')
bt.load('/Users/jfilippazzo/Documents/Data/Models/bt-settl_400_10k/')

Could not import regions, which is required for some of the functionalities of this module.


In [11]:
# # Build SED that definitely has a MILES spectrum
# s = SED('Gaia DR2 538028216707715712')
# s.find_2MASS()
# s.find_WISE()
# s.find_Gaia(idx=0)
# # s.find_SDSS_spectra(surveys=['optical', 'apogee'], search_radius=None)
# s.parallax = None
# s.fit_modelgrid(bt, fit_to='phot')
# s.plot()

m = XMatch.query(cat1='vizier:J/MNRAS/371/703/catalog', cat2='vizier:I/345/gaia2', max_distance=5 * q.arcsec)
m.pprint(max_width=-1, max_lines=-1)

angDist    _RAJ2000    _DEJ2000  recno  FileName        Name         RAJ2000     DEJ2000   E(B-V)    SpType     II  Teff logg [Fe/H]    Lib    CaT AssocData        SimbadName          ra_epoch2000     dec_epoch2000   errHalfMaj errHalfMin errPosAng      source_id             ra       ra_error       dec       dec_error parallax parallax_error    pmra   pmra_error   pmdec   pmdec_error duplicated_source phot_g_mean_flux phot_g_mean_flux_error phot_g_mean_mag phot_bp_mean_flux phot_bp_mean_flux_error phot_bp_mean_mag phot_rp_mean_flux phot_rp_mean_flux_error phot_rp_mean_mag   bp_rp   radial_velocity radial_velocity_error rv_nb_transits teff_val a_g_val e_bp_min_rp_val radius_val  lum_val 
-------- ----------- ----------- ----- ---------- ---------------- ----------- ----------- ------ ------------ --- ----- ---- ------ --------- --- --------- ----------------------- ----------------- ----------------- ---------- ---------- --------- ------------------- --------------- -------- ----------

## Stars with spectra
For this test, I crossmatch Gaia DR2 with the SDSS APOGEE 2 survey to get a good distribution of stars with parallaxes, NIR spectra, and effective temperatures. 

In [2]:
# Crossmatch surveys... this takes about a minute to find 475k sources
all_sources = XMatch.query(cat1='vizier:III/284/allstars', cat2='vizier:II/246/out', max_distance=5 * q.arcsec)

In [3]:
# Take a random sample of 100 sources wth effective temperatures
n_sources = 50
cols = ['Gaia', 'Teff', 'e_Teff', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag', 'Kmag', 'e_Kmag']
sources = all_sources[all_sources['Teff'] > 100]

# Sort by Teff then use sample spread across Teff space
skip = int(len(sources) / n_sources)
sources = sources[::skip][cols]
sources.pprint(max_width=-1, max_lines=-1)

        Gaia          Teff    e_Teff  Jmag  e_Jmag  Hmag  e_Hmag  Kmag  e_Kmag
------------------- -------- ------- ------ ------ ------ ------ ------ ------
 538028216707715712 3723.226  61.795  8.597  0.039  7.667  0.029  7.314  0.018
 418561747167182848  4937.46 116.435  12.73  0.024 12.095   0.03 12.022  0.023
 535589676014595456 9090.802 463.677 11.008  0.023 10.928   0.03 10.936  0.023
2496885357858590976 5637.811  136.26 11.326  0.021 11.015  0.022 10.938  0.022
 435332907260617600 4919.329  96.176 11.594  0.019 10.944  0.028 10.774   0.02
 224018519133853696  4948.01 108.534 12.236  0.019 11.639  0.017 11.522  0.022
3311874403210528000 4372.207  95.342 12.432  0.021 11.663  0.024 11.483   0.02
 206075318302126336 5027.194  91.592 10.616   0.02 10.038  0.019  9.864  0.018
3417529189944950528 4735.421  86.098 11.565  0.021 10.742  0.022 10.505  0.017
3217259640935445376 5132.159 110.571 12.257  0.026 11.753  0.023 11.642  0.022
3430728891460006016  5279.57 132.515 13.089  0.021 1

In [8]:
# Function for multiprocessing
def sed_pipeline(row, model=bt):
    
    try:
        
        name = 'Gaia DR2 {}'.format(row[0])
        
        # Make the SED
        s = SED(name, verbose=False)

        # Add the 2MASS photometry
        s.add_photometry('2MASS.J', row[3], row[4])
        s.add_photometry('2MASS.H', row[5], row[6])
        s.add_photometry('2MASS.Ks', row[7], row[8])

        # Get photometry
        s.find_SDSS()
        s.find_WISE()

        # Get APOGEE spectrum
    #     s.find_SDSS_spectra(surveys=['optical', 'apogee'], search_radius=None)

        # Fit model grid
        s.fit_modelgrid(model, fit_to='phot', name='phot_fit')
        s.spectrum_from_modelgrid(bt, **{par: val for par, val in s.best_fit['phot_fit'].items() if par in bt.parameters})

        # Get Gaia parallax
        s.find_Gaia(include=['parallax', 'teff'])
        
        # Reject if bad parallax
        if (s.distance[0]/s.distance[1] < 5) or s.distance > 1000*q.pc:
            print("{}: Bad Parallax ".format(s.parallax))
            del s

        # Reject if no Teff
        elif s.Teff_Gaia < 200*q.K or s.Teff_Gaia > 10000*q.K:
            print("{}: Bad Teff ".format(s.Teff_Gaia))
            del s

        else:

            s.make_sed()

            return s
        
    except IOError:
        
        print("{}: Could not make SED via pipeline".format(name))

In [9]:
# Multiprocess sources table    
pool = ThreadPool(8)
all_seds = pool.map(sed_pipeline, sources.iterrows())
pool.close()
pool.join()

(<Quantity -2.61 solMass>, <Quantity 0.12 solMass>, <Quantity 0.13 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].
(<Quantity -2.78 solMass>, <Quantity 0.13 solMass>, <Quantity 0.13 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].
(<Quantity -0.0427 mas>, <Quantity 0.0446 mas>): parallax value is not in valid range [0.0 mas, inf mas].
(<Quantity -0.2 solMass>, <Quantity 0.02 solMass>, <Quantity 0.03 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].
(<Quantity -0.0627 mas>, <Quantity 0.1099 mas>): parallax value is not in valid range [0.0 mas, inf mas].
(<Quantity 0.2729 mas>, <Quantity 0.0863 mas>): Bad Parallax 
(<Quantity 0.0004 mas>, <Quantity 0.0373 mas>): Bad Parallax 
(<Quantity -0.34 solMass>, <Quantity 0.03 solMass>, <Quantity 0.04 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].
(<Quantity -0.21 solMass>, <Quantity 0.02 solMass>, <Quantity 0.02 solMass>): mass value is not in va

TableParseError: Failed to parse SIMBAD result! The raw response can be found in self.last_response, and the error in self.last_table_parse_error. The attempted parsed result is in self.last_parsed_result.
 Exception: No table found in VOTABLE file.

In [2]:
all_seds

NameError: name 'all_seds' is not defined

In [6]:
# Make a catalog
cat = Catalog('Gaia + APOGEE')

# Add the SEDs to the catalog
for s in all_seds:
    if s is not None:
        cat.add_SED(s)

# Add Gaia Teff column
cat.add_column('Teff_Gaia', list(sources['Teff'])*q.K, unc=list(sources['e_Teff'])*q.K)

# Make a scatter plot
# from bokeh.plotting import figure, show
# fig = figure()
# fig.circle(cat.results['Teff'], cat.results['Teff_Gaia'])
# show(fig)

ValueError: 51 != 0: Data is not the right size for this catalog.

In [80]:
cat.results['Teff'], cat.results['Teff_Gaia']
# cat.plot('Teff', 'Teff_Gaia')

KeyError: 'Teff_Gaia'

In [None]:
cat.results['name', 'Teff', 'Teff_Gaia']