# Performance Test of `sedkit`
Th purpose of this notebook is to test the performance of `sedkit` across the main sequence by comparing the results of this software against well characterized stars in the literature.

In [64]:
from sedkit import SED, Catalog, BTSettl
from astropy import units as q
from astroquery.xmatch import XMatch
from multiprocessing.pool import ThreadPool
from bokeh.io import output_notebook
output_notebook()

## Stars with spectra
For this test, I crossmatch Gaia DR2 with the SDSS APOGEE 2 survey to get a good distribution of stars with parallaxes, NIR spectra, and effective temperatures. 

In [65]:
# Crossmatch surveys... this takes about a minute to find 475k sources
all_sources = XMatch.query(cat1='vizier:III/284/allstars', cat2='vizier:II/246/out', max_distance=5 * q.arcsec)

(<Quantity 63781 K>, <Quantity 3729 K>): Teff value is not in valid range [0.0 K, 50000.0 K].
(<Quantity -1.19 solMass>, <Quantity 0.07 solMass>, <Quantity 0.08 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].
(<Quantity -8.09 solMass>, <Quantity 0.43 solMass>, <Quantity 0.45 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].


In [66]:
# Take a random sample of 100 sources wth effective temperatures
n_sources = 50
cols = ['Gaia', 'Teff', 'e_Teff', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag', 'Kmag', 'e_Kmag']
sources = all_sources[all_sources['Teff'] > 100]

# Sort by Teff then use sample spread across Teff space
skip = int(len(sources) / n_sources)
sources = sources[::skip][cols]
sources.pprint(max_width=-1, max_lines=-1)

(<Quantity 55482 K>, <Quantity 50784 K>): Teff value is not in valid range [0.0 K, 50000.0 K].
        Gaia          Teff    e_Teff  Jmag  e_Jmag  Hmag  e_Hmag  Kmag  e_Kmag
------------------- -------- ------- ------ ------ ------ ------ ------ ------
 538028216707715712 3723.226  61.795  8.597  0.039  7.667  0.029  7.314  0.018
 418561747167182848  4937.46 116.435  12.73  0.024 12.095   0.03 12.022  0.023
 535589676014595456 9090.802 463.677 11.008  0.023 10.928   0.03 10.936  0.023
2496885357858590976 5637.811  136.26 11.326  0.021 11.015  0.022 10.938  0.022
 435332907260617600 4919.329  96.176 11.594  0.019 10.944  0.028 10.774   0.02
 224018519133853696  4948.01 108.534 12.236  0.019 11.639  0.017 11.522  0.022
3311874403210528000 4372.207  95.342 12.432  0.021 11.663  0.024 11.483   0.02
 206075318302126336 5027.194  91.592 10.616   0.02 10.038  0.019  9.864  0.018
3417529189944950528 4735.421  86.098 11.565  0.021 10.742  0.022 10.505  0.017
3217259640935445376 5132.159 110.571

In [67]:
# Example
bt = BTSettl()
row = sources[1]
s = SED('Gaia DR2 {}'.format(row[0]), verbose=True)
s.use_best_fit = True

# Add the 2MASS photometry
s.add_photometry('2MASS.J', row[3], row[4])
s.add_photometry('2MASS.H', row[5], row[6])
s.add_photometry('2MASS.Ks', row[7], row[8])

# Get SDSS photometry
s.find_SDSS()

# Get WISE photometry
s.find_WISE()

# Get APOGEE spectrum
s.find_SDSS_spectra(surveys=['optical', 'apogee'], search_radius=None)

# Get Gaia parallax
s.find_Gaia(include=['parallax', 'teff'])

# Fit model grid
s.fit_modelgrid(bt, fit_to='phot', name='phot_fit')

s.spectrum_from_modelgrid(bt, **{par: val for par, val in s.best_fit['phot_fit'].items() if par in bt.parameters})

s.plot()
s.results
print(s.Teff_Gaia)

[sedkit] SED initialized
[sedkit] Setting name to Gaia DR2 418561747167182848
[sedkit] 1 record for Gaia DR2 418561747167182848 found in Simbad.
[sedkit] Setting sky_coords to <SkyCoord (ICRS): (ra, dec) in deg
    (0.61747028, 55.43474361)>
[sedkit] There was a problem determining the interstellar reddening. Setting to 0. You can manually set this with the 'reddening' attribute.
[sedkit] Setting parallax to (<Quantity 0.5832 mas>, <Quantity 0.026 mas>) with reference '2018yCat.1345....0G'
[sedkit] Setting distance to (<Quantity 1716.57 pc>, <Quantity 74.35 pc>, <Quantity 76.17 pc>) with reference '2018yCat.1345....0G'
[sedkit] Setting interstellar reddening to 0.1599999964237213 with reference '2018JOSS....3..695M'
[sedkit] Setting 2MASS.J photometry to 12.626 (0.024) with reference 'None'
[sedkit] Setting 2MASS.H photometry to 12.043 (0.030) with reference 'None'
[sedkit] Setting 2MASS.Ks photometry to 11.996 (0.023) with reference 'None'
[sedkit] 0 records found in V/147 using targe

4501.14 K


In [74]:
# Function for multiprocessing
bt = BTSettl()
def sed_pipeline(row, model=bt):
    
    try:
        
        name = 'Gaia DR2 {}'.format(row[0])
        
        # Make the SED
        s = SED(name, verbose=False)

        # Add the 2MASS photometry
        s.add_photometry('2MASS.J', row[3], row[4])
        s.add_photometry('2MASS.H', row[5], row[6])
        s.add_photometry('2MASS.Ks', row[7], row[8])

        # Get photometry
        s.find_SDSS()
        s.find_WISE()

        # Get APOGEE spectrum
    #     s.find_SDSS_spectra(surveys=['optical', 'apogee'], search_radius=None)

        # Fit model grid
        s.fit_modelgrid(model, fit_to='phot', name='phot_fit')
        s.spectrum_from_modelgrid(bt, **{par: val for par, val in s.best_fit['phot_fit'].items() if par in bt.parameters})

        # Get Gaia parallax
        s.find_Gaia(include=['parallax', 'teff'])
        
        # Reject if bad parallax
        if (s.distance[0]/s.distance[1] < 5) or s.distance > 1000*q.pc:
            print("{}: Bad Parallax ".format(s.parallax))
            del s

        # Reject if no Teff
        elif s.Teff_Gaia < 200*q.K or s.Teff_Gaia > 10000*q.K:
            print("{}: Bad Teff ".format(s.Teff_Gaia))
            del s

        else:

            s.make_sed()

            return s
        
    except:
        
        print("{}: Could not make SED via pipeline".format(name))

In [75]:
ex = sed_pipeline(sources[0])
ex.plot()
print(ex.parallax)
print(ex.Teff_Gaia)
ex.results

Gaia DR2 538028216707715712: Could not make SED via pipeline


AttributeError: 'NoneType' object has no attribute 'plot'

In [76]:
# Multiprocess sources table    
pool = ThreadPool(8)
all_seds = pool.map(sed_pipeline, sources.iterrows())
pool.close()
pool.join()

(<Quantity -2.79 solMass>, <Quantity 0.13 solMass>, <Quantity 0.13 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].
(<Quantity -2.61 solMass>, <Quantity 0.13 solMass>, <Quantity 0.13 solMass>): mass value is not in valid range [0.0 solMass, 226.0 solMass].
Gaia DR2 3311874403210528000: Could not make SED via pipeline
Gaia DR2 435332907260617600: Could not make SED via pipeline
Gaia DR2 535589676014595456: Could not make SED via pipeline
Gaia DR2 538028216707715712: Could not make SED via pipeline
(<Quantity -0.0427 mas>, <Quantity 0.0446 mas>): parallax value is not in valid range [0.0 mas, inf mas].
Gaia DR2 865352111038710016: Could not make SED via pipeline
(<Quantity -0.0627 mas>, <Quantity 0.1099 mas>): parallax value is not in valid range [0.0 mas, inf mas].
Gaia DR2 3126852056826460032: Could not make SED via pipeline
(<Quantity 0.2729 mas>, <Quantity 0.0863 mas>): Bad Parallax 
Gaia DR2 3417529189944950528: Could not make SED via pipeline
Gaia DR2 20607

In [83]:
# Make a catalog
cat = Catalog('Gaia + APOGEE')

# Add the SEDs to the catalog
for s in all_seds:
    if s is not None:
        cat.add_SED(s)

# Add Gaia Teff column
cat.add_column('Teff_Gaia', list(sources['Teff'])*q.K, unc=list(sources['e_Teff'])*q.K)

# Make a scatter plot
# from bokeh.plotting import figure, show
# fig = figure()
# fig.circle(cat.results['Teff'], cat.results['Teff_Gaia'])
# show(fig)

Teff
 K  
---- [] K


In [80]:
cat.results['Teff'], cat.results['Teff_Gaia']
# cat.plot('Teff', 'Teff_Gaia')

KeyError: 'Teff_Gaia'

In [None]:
cat.results['name', 'Teff', 'Teff_Gaia']