In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pyarrow.parquet as pq
import pandas as pd
import warnings
from astropy.coordinates import SkyCoord
from astropy import units as u
import gc

In [None]:
# mitigate against the fact that pandas has the most unstable UI in all of Python
# I know this is gross. It's legacy code adapted for expediency. Don't @ me.
warnings.filterwarnings("ignore")

In [None]:
def read_pmsu(filename = '../ref/apj520168t3_mrt.txt',
              data=pd.DataFrame(),verbose=1,csvfile=None):
    with open(filename) as f:
        table = f.readlines()[55:]
    for i,line in enumerate(table):
        if line.strip()=='':
            continue
        try:
            if line[0:19].strip() in data.name.tolist():
                continue
        except AttributeError:
            pass
        entry = {
            'catalog':'PMSU',
            'catfile':filename,
            'source':line[0:19].strip(),
            'ggoid':int(line[0:19]),
            'ra':float(line[34:45]),
            'dec':float(line[46:57]),
            'nmag':float(line[90:96]),
            'fmag':float(line[103:110]),
            'spectype':'M',
            'specsubtype':float(line[30:33]),
            'distance':float(line[214:218]),
            }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_pmsu()

In [None]:
def read_dr7(filename = '../ref/apj520168t2_mrt.txt',
             data=pd.DataFrame(),csvfile=None):
    with open(filename) as f:
        table = f.readlines()[63:]
    for i,line in enumerate(table):
        try:
            if line[0:19].strip() in data.name.tolist():
                print_inline('Skipping...')
                continue
        except AttributeError:
            pass
        entry = {
            'catalog':'DR7',
            'catfile':filename,
            'source':line[0:19].strip(),
            'ggoid':int(line[0:19]),
            'ra':float(line[41:52]),
            'dec':float(line[53:64]),
            'spectype':'M',
            'specsubtype':float(line[39:40]),
            'distance':float(line[239:244]),
            }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_dr7()

In [None]:
def read_lepinegaidos(filename1='../ref/aj403664t1_mrt.txt',
                      filename2='../ref/aj403664t2_mrt.txt',
                      data=pd.DataFrame(),csvfile=None):
    with open(filename1) as f1:
        table1 = f1.readlines()[41:]
    with open(filename2) as f2:
        table2 = f2.readlines()[30:]
    for i in range(len(table1)):
        try:
            if table1[i][0:16].strip() in data.name.tolist():
                continue
        except AttributeError:
            pass
        entry={
            'catalog':'LepineGaidos',
            'catfile':filename1,
            'source':table1[i][0:16].strip(),
            'ra':float(table1[i][55:65]),
            'dec':float(table1[i][67:77]),
            'spectype':table2[i][109:112].strip()[0].upper(),
            'specsubtype':float(table2[i][109:112].strip()[1:]),
            'distance':1./float(table2[i][94:101]),
            }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_lepinegaidos()

In [None]:
def read_shkolnik2010(filename = '../ref/apj374973t1_ascii.txt',
                      data=pd.DataFrame(),csvfile=None):
    with open(filename) as f:
        table = f.readlines()[5:35]
    for line in table:
        entries = line.split('\t')
        if len(line.split('\t'))==10:
            entries = line.split('\t')+[None]
        try:
            if entries[0] in data.name.tolist():
                continue
        except AttributeError:
            pass
        entry={
            'catalog':'Shkolnik2010',
            'catfile':filename,
            'source':entries[0],
            'ra':float(entries[1]),
            'dec':float(entries[2]),
            'spectype':None,
            'specsubtype':None,
            'distance':None,
            }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_shkolnik2010()

In [None]:
def read_shkolnik2014(filename = '../ref/aj499985t1_ascii.txt',
                      data=pd.DataFrame(),csvfile=None):
    with open(filename) as f:
        table = f.readlines()[6:233]
    for line in table:
        if line.strip()=='':
            continue
        entries = line.split('\t')
        if len(entries)==1:
            continue
        if len(line.split('\t'))==10:
            entries = line.split('\t')+[None]
        try:
            if entries[0] in data.name.tolist():
                continue
        except AttributeError:
            pass
        entry={
            'catalog':'Shkolnik2014',
            'catfile':filename,
            'source':entries[0],
            'ra':float(entries[1]),
            'dec':float(entries[2]),
            'spectype':entries[3][0],
            'specsubtype':np.array(entries[3][1:].split('-{s}'.format(
                                s=entries[3][0])),dtype='float16').mean(),
            'distance':float(entries[5]),
        }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_shkolnik2014()

In [None]:
def read_miles2017(filename = '../ref/1705.03583.txt',
                   data=pd.DataFrame(),csvfile=None):
    with open(filename) as f:
        table = f.readlines()[55:426]
    for line in table:
        entries = line.split('&')
        try:
            if entries[0].strip() in data.name.tolist():
                print_inline('Skipping {n}...'.format(n=entries[0]))
                continue # Note: This (new) catalog should actually take priority!
        except AttributeError:
            pass
        entry={
            'catalog':'MilesShkolnik2017',
            'catfile':filename,
            'source':entries[0].strip(),
            'ra':float(entries[1]),
            'dec':float(entries[2]),
            'spectype':entries[3].strip()[0],
            'distance':float(entries[6]),
        }
        if entries[3].strip()[1:]==':':
            entry['specsubtype']=None
        else:
            entry['specsubtype']=float(entries[3].strip()[1:])
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_miles2017()

In [None]:
def read_guvv(filename = '../ref/datafile2.txt',
                   data=pd.DataFrame(),csvfile=None):
    with open(filename) as f:
        table = f.readlines()[40:]
    for i,line in enumerate(table):
        if line.strip()=='':
            continue
        p = line[0:18].strip()
        pstring = f"{p[1:3]} {p[3:5]} {p[5:9]} {p[9:12]} {p[12:14]} {p[14:16]}"
        c = SkyCoord(pstring,unit=(u.hourangle,u.deg))
        entry = {
            'catalog':'GUVV',
            'catfile':filename,
            'GALEX ID':line[0:18].strip(), # source position as Jhhmmss.s+/-ddmmss.s (the table definition wrongly states Jhhmmss.ss+/-ddmmss.s)
            'SDSS ID':line[20:38].strip(),
            #'SIMBAD ID':line[40:68].strip()
            'ra':c.ra.deg,
            'dec':c.dec.deg,
            'nmag':float(line[89:94]), # maximum
            #'fmag':float(line[103:110]),
            #'spectype':'M',
            #'specsubtype':float(line[30:33]),
            #'distance':float(line[214:218]),
            }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_guvv()

In [None]:
def read_guvv2(filename = '../ref/aj274302_mrt2.txt',
                   data=pd.DataFrame(),csvfile=None):
    with open(filename) as f:
        table = f.readlines()[35:]
    for i,line in enumerate(table):
        if line.strip()=='':
            continue
        p = line[0:18].strip()
        pstring = f"{p[1:3]} {p[3:5]} {p[5:9]} {p[9:12]} {p[12:14]} {p[14:16]}"
        c = SkyCoord(pstring,unit=(u.hourangle,u.deg))
        entry = {
            'catalog':'GUVV2',
            'catfile':filename,
            'GALEX ID':line[0:18].strip(), # source position as Jhhmmss.s+/-ddmmss.s (the table definition wrongly states Jhhmmss.ss+/-ddmmss.s)
            'SDSS ID':line[20:38].strip(),
            #'SIMBAD ID':line[40:68].strip()
            'ra':c.ra.deg,
            'dec':c.dec.deg,
            'nmag':float(line[103:108]), # maximum
            #'fmag':float(line[103:110]),
            #'spectype':'M',
            #'specsubtype':float(line[30:33]),
            #'distance':float(line[214:218]),
            }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_guvv2()

In [None]:
def read_gtds(filename = '../ref/apj462590t4_mrt.txt',
                   data=pd.DataFrame(),csvfile=None):
    with open(filename) as f:
        table = f.readlines()[60:]
    for i,line in enumerate(table):
        if line.strip()=='':
            continue
        entry = {
            'catalog':'GTDS',
            'catfile':filename,
            'GTDS ID':line[1:16].strip(),
            #'SIMBAD ID':line[40:68].strip()
            'ra':float(line[17:25]),
            'dec':float(line[27:34]),
            'nmag':float(line[37:42]), # minimum
            #'fmag':float(line[103:110]),
            #'spectype':'M',
            #'specsubtype':float(line[30:33]),
            #'distance':float(line[214:218]),
            'classification':line[92:104].strip(),
            }
        data = data.append(pd.Series(entry),ignore_index=True)
    return data

read_gtds()

In [None]:
def ingest():
    # Compile a list of known M Dwarfs
    return pd.DataFrame().append(
        read_miles2017(),ignore_index=True,sort=False).append(
        read_dr7(),ignore_index=True,sort=False).append(
        read_pmsu(),ignore_index=True,sort=False).append(
        read_shkolnik2014(),ignore_index=True,sort=False).append(
        read_lepinegaidos(),ignore_index=True,sort=False).append(
        read_shkolnik2010(),ignore_index=True,sort=False).append(
        read_guvv(),ignore_index=True,sort=False).append(
        read_guvv2(),ignore_index=True,sort=False).append(
        read_gtds(),ignore_index=True,sort=False)

In [None]:
%time table = ingest()
len(table)

In [None]:
def angularSeparation(ra1, dec1, ra2, dec2):
    d2r = np.pi/180.
    ra2deg = 1./d2r
    d1 = dec1*d2r
    d2 = dec2*d2r
    r1 = ra1*d2r
    r2 = ra2*d2r
    a = np.sin((d2-d1)/2.)**2.+np.cos(d1)*np.cos(d2)*np.sin((r2-r1)/2.)**2.
    r = 2*np.arcsin(np.sqrt(a))
    return r*ra2deg

In [None]:
header_data = pd.read_csv('../ref/mislike_image_header_table.csv')
catalog_filename = '../ref/catalog_nd_daostarfinder.parquet'
catalog_file = pq.ParquetFile(catalog_filename)

In [None]:
def get_source(row,catalog_filename):
    ra,dec = row[1][['ra','dec']].values
    this_area = pq.read_table(catalog_filename,filters =
                            [#('eclipse','=',eclipse),
                             #('obj_id','=',obj_id),
                             ('ra','>=',float(ra)-0.01),
                             ('ra','<=',float(ra)+0.01),
                             ('dec','>=',float(dec)-0.01),
                             ('dec','<=',float(dec)+0.01),
                             ('aperture_sum_edge_n_12_8','=',0.0),
                             ('aperture_sum_mask_n_12_8','=',0.0),
                            ],
                              columns = ['eclipse', 'ra', 'dec', 'obj_id']
                         ).to_pandas()
    if not len(this_area):
        #print(f"# No MIS sources near {row[1]['catalog']}: {ra}, {dec}")
        return
    for eclipse in this_area['eclipse'].unique():
        this_eclipse = this_area.loc[this_area['eclipse']==eclipse]
        try:
            ix = np.argmin(angularSeparation(ra,dec,
                    this_eclipse['ra'].values,
                    this_eclipse['dec'].values))
        except ValueError:
            #print(f'No alternative nearby source for e{eclipse}')
            continue
        print('python make_gfcat.py',
              int(eclipse),int(this_eclipse.iloc[ix]['obj_id']),
              f"# {row[1]['catalog']}: {ra}, {dec}")

    gc.collect()

def generate_target_list(table):
    for row in table.iterrows():
        get_source(row,catalog_filename)
        gc.collect()
            
%time generate_target_list(table)