In [1]:
from astropy import units as u
from astropy import coordinates as coord
from astropy import coordinates as coord
from astropy import units as u
from astropy.table import Table
from time import process_time

import astroquery
from astroquery.simbad import Simbad
from astroquery.vo_conesearch import conf, conesearch, vos_catalog

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd

import splat

Adding 145 sources from C:\splat-master/resources/Data/Public/LRIS-RED/ to spectral database


# Customize SIMBAD

In [10]:
def my_simbad(path):
    """
    my_simbad(path)
    
    Create an customized SIMBAD instance with specified VO table fields. 
    
    Parameters
    ----------
    path: string
        A string, specifies the path to the excel file that stores the names of the VO table fields.
        
    Returns
    -------
    out:
        An customized astroquery.simbad.Simbad() instance.
    
    
    Comments:    
    -------
    You can customize the VO Table through modifying "Catalogs and columns.xlsx". Or, simply modifying the returned incident.
    
    """
    
    fields_to_add = pd.read_excel(path) # read in the fields we want from the excel sheet
    fields_to_add = fields_to_add.where(fields_to_add['output name'] != '[drop]').dropna(how='all') # drop the unwanted fields

    fields_to_add['input name'] = fields_to_add['input name'].apply(lambda x: x[:-2] + '(' + x[-1] + ')' if 'FLUX' in x else x) # reformat from "FLUX_U" to "FLUX(U)"
    fields_to_add['input name'] = fields_to_add['input name'].apply(lambda x: x.split('(')[0].lower() + '(' + x.split('(')[1]  if '(' in x else x) # reformat from "FLUX(U)" to "flux(U)" This way we preserve the captial and lower cases inside the parentheses
    fields_to_add['input name'] = fields_to_add['input name'].apply(lambda x: x if '(' in x else x.lower()) # reformat: convert all others to lower cases.

    fields_to_add['input name'] = fields_to_add['input name'].replace({'plx_value':'plx', 'mk_spectral_type':'mk', 'sp_type':'sptype'}) # reformat some other fields

    fields_to_add.drop(index=0, axis=1, inplace=True) # drop the row 'main_id' to avoid repeated fields. 'main_id' is already included as default.

    fields_to_add.reset_index(inplace=True) # reset index which was messed up when we drop the unwanted fields.
    fields_to_add.drop(columns='index', axis=0, inplace=True)

    fields_list = list(fields_to_add['input name']) # create a list to add to the search fields
    fields_list.append('otype')
    
    customSimbad = Simbad()
    
    for item in fields_list:
        try:
            customSimbad.add_votable_fields(item)
        except:
            continue
    
    print('"customSimbad" is ready')
    return customSimbad

In [8]:
customSimbad = my_simbad('./Catalogs and columns.xlsx')

"customSimbad" is ready


In [11]:
unwanted_otypes = ['X','QSO', 'Galaxy', 'Blue', 'Radio', 'WD*', 'SN', 'Candidate_RGB*', 'Planet', 'Planet?', 'GroupG', 'Unknown', 'RadioG', 'GinCl','BClG', 'Compact_Gr_G','ClG', 'LINER', 'Seyfert_1','AGN', 'EmG', 'GinGroup', 'V*', 
                   'Inexistent', 'Radio(mm)', 'S*', 'HB*', 'Seyfert_2', 'MolCld', 'DkNeb', 'gammaBurst', 'RGB*', 'UV', 'C*', 'YSO', 'GiC','IR>30um']

unwanted_spt = ['IV', 'III', 'II', 'G', 'F', 'A', 'B', 'O']

favorite_otypes = ['low-mass*', 'BYDra', 'brownD*', 'PM*', 'BY*', 'Fl*', 'Flare*', 'SB*', 'Ro*','LM*', 'RotV*','Candidate_brownD*','Candidate_Hsd','Er*']

bibcode_list = ['2013AJ....145..102L', '2007ApJ...669.1235L', '2020AJ....159...30H']

def o_filter(df,extra=[],olist=unwanted_otypes):
    """
    o_filter(df,extra=[],olist=unwanted_otypes)
    
    Filtering out entries with unwanted object types from the search result
    

    Parameters
    ----------
    df:
        A Pandas.DataFrame object that stores the search result from Simbad().query_region.
    
    extra:
        Append object types to be filtered.
    
    olist:
        A list that stores the unwanted object types in string form. Default set to "unwanted_otypes"
    
        
    Returns
    -------
    out:
        A Pandas.DataFrame that excludes the specified object types.
    """    
    if len(extra) ==0:
        result = df.drop(index = df[df['OTYPE'].str.decode('ascii').isin(olist)].index)
        return result
    else:
        olist.extend(extra)
        result = df.drop(index = df[df['OTYPE'].str.decode('ascii').isin(olist)].index)
        return result

def s_filter(df):
    """
    s_filter(df)
    
    Filter out unwanted spectral types from the search result.
    

    Parameters
    ----------
    df:
        A Pandas.DataFrame that stores the search result from Simbad().query_region.
        
    Returns
    -------
    out:
        A Pandas.DataFrame that excludes the spcified spectral types.
    
    Notes:
    -------
        The unwanted spectral types are predefined by "unwanted_spt".
    """    
    result = df.drop(index = df[df['SP_TYPE'].str.decode('ascii').apply(lambda x: 'y' if any(ele in x for ele in unwanted_spt) else '') == 'y'].index)
    return result

def take_favorite(df):
    """
    take_favorite(df)
    
    Take search result with specified object types listed in "favorite_otypes" and exclude the others.

    Parameters
    ----------
    df:
        A Pandas.DataFrame object that stores the search result from Simbad().query_region.
        
    Returns
    -------
    out:
        A Pandas.DataFrame that includes only the specified object types.
    
    Notes:
    -------
        The favorite object types are predefined by "favorite_otypes".
    """        
    if any(ele in list(df['OTYPE'].str.decode('ascii').unique()) for ele in favorite_otypes):
        result = df[df['OTYPE'].str.decode('ascii').isin(favorite_otypes)]
    else:
        result = df
    return result

def magnet_V(df):
    """
    magnet_V(df)
    
    Take search result with spectral type suffix "V" and exclude the others.
    

    Parameters
    ----------
    df:
        A Pandas.DataFrame object that stores the search result from Simbad().query_region.
        
    Returns
    -------
    out:
        A Pandas.DataFrame object.
    
    Notes:
    -------
        If none of the search results satisfies the requirement then the original results are returned.
    """        
    if any(('V' or 'M') in x for x in df['SP_TYPE'].str.decode('ascii')):
        df = df[[('V' or 'M') in x for x in df['SP_TYPE'].str.decode('ascii')]]
        return df
    else:
        return df

def compare_spt(df, index, diff=3):
    """
    magnet_V(df)
    
    Compare spectral types between ours and the one in SIMBAD. If smaller than indicated then we pick the source.
    

    Parameters
    ----------
    df:
        A Pandas.DataFrame object that stores the search result from Simbad().query_region.
        
    Returns
    -------
    out:
        A Pandas.DataFrame object.
    
    Notes:
    -------
        If none of the search results satisfies the requirement then the original results are returned.
    """   
    temp = df[df['SP_TYPE'].str.decode('ascii').apply(lambda x: abs(splat.typeToNum(x) - splat.typeToNum(kast['LSPN_SPT'][index]))) <= diff].copy()
    if len(temp) == 0:
        return df
    else:
        return temp

def filter_z(df):
    result = magnet_V(take_favorite(s_filter(o_filter(temp,['**']))))
    return result

def go_query(length=25):
    t1 = process_time()
    raw_result = customSimbad.query_region(kast['SKYCOORD'][0], radius = 2*u.arcmin).to_pandas()
    for i in range(1, length):
        try:
            temp = customSimbad.query_region(kast['SKYCOORD'][i], radius = 2*u.arcmin).to_pandas()
            temp = s_filter(o_filter(temp))
            temp = compare_spt(temp,i)
            temp = take_favorite(temp)
            temp = magnet_V(temp)
            raw_result = raw_result.append(temp.iloc[0])
        except:
            raw_result = raw_result.append(pd.Series(dtype='str'), ignore_index=True)
    t2 = process_time()
    raw_result.reset_index(inplace=True)
    raw_result.drop('index', axis=1, inplace=True)
    # For some reason SIMBAD returns some columns twice. Below I manually dropped those.
    raw_result = raw_result.drop(columns=['RA_PREC', 'DEC_PREC', 'COO_ERR_MAJA',
       'COO_ERR_MINA', 'COO_ERR_ANGLE', 'COO_QUAL', 'COO_WAVELENGTH','MK_ds', 'MK_mss','SP_QUAL','RA_2','DEC_2','COO_BIBCODE_2','SP_BIBCODE_2'], axis=0)
    raw_result['FLAG'] = ''
    print(str((t2-t1)/60) + ' mins')
    return raw_result

def single_query(x, r=2, extra_filter=[], extra_fav=[], verbose=False):
    x = splat.properCoordinates(x)
    try:
        temp = customSimbad.query_region(x, radius = r*u.arcmin).to_pandas()
        temp = take_favorite(s_filter(o_filter(temp, extra_filter)))
        temp = magnet_V(temp)
        raw_result = temp
    except:
        if verbose==True:
            print('no result found')
    
    raw_result.reset_index(inplace=True)
    raw_result.drop('index', axis=1, inplace=True)
    # For some reason SIMBAD returns some columns twice. Below I manually dropped those.
    raw_result = raw_result.drop(columns=['RA_PREC', 'DEC_PREC', 'COO_ERR_MAJA',
       'COO_ERR_MINA', 'COO_ERR_ANGLE', 'COO_QUAL', 'COO_WAVELENGTH','MK_ds', 'MK_mss','SP_QUAL','RA_2','DEC_2','COO_BIBCODE_2','SP_BIBCODE_2'], axis=0)
    return raw_result

def go_querybib(bibs):
    temp = []
    for i in bibs:
        temp.append(Simbad.query_bibobj(i).to_pandas())
    result = pd.concat(temp, ignore_index=True)
    return result

def decode(item):
    try:
        return item.decode('ascii')
    except:
        return item
    
def diff_spt(df, verbose=False):
    df['diff_mk'] = abs(raw['MK_Spectral_type'].str.decode('ascii').apply(lambda x: splat.typeToNum(x)) - kast['LSPN_SPT'].apply(lambda x: splat.typeToNum(x)))
    df['diff_simbad'] = abs(raw['SP_TYPE'].str.decode('ascii').apply(lambda x: splat.typeToNum(x)) - kast['LSPN_SPT'].apply(lambda x: splat.typeToNum(x)))
    df['DIFF_SPT'] = df[['diff_mk','diff_simbad']].min(axis=1)
    if verbose:
        return df
    else:
        df.drop(columns=['diff_mk','diff_simbad'], axis=0, inplace=True)
        return df

In [12]:
def rename_columns:
    columns_to_convert = []
    for i in raw.columns:
        if 'b' in str(raw.loc[0, i]):
            columns_to_convert.append(i)
        
    for i in columns_to_convert:
        raw[i] = raw[i].str.decode('ascii')

    ionames = pd.read_excel('./Catalogs and columns.xlsx')
    ionames = ionames[ionames['output name'] != '[drop]'].dropna(how='all') # obtain the input name vs output name
    new_names = ionames[['input name','output name']].set_index('input name')['output name'].to_dict() # turn the input vs output name into a dictionary for the next step
    raw.rename(columns=new_names, inplace=True)

NameError: name 'raw' is not defined