### Run upon export from spreadsheet

In [21]:
import os

from astroquery.mast import Catalogs
import numpy as np
import pandas as pd


tces_file = '/mnt/tess/labels/tois.csv'
ext_data_file = '/mnt/tess/labels/ext_mast_data.csv'


tce_table = pd.read_csv(tces_file, header=0, low_memory=False)
tce_table['tic_id'] = tce_table['TIC']
tce_table['Duration'] = tce_table['Transit Duration Value']
tce_table['Period'] = tce_table['Orbital Period Value']
tce_table['RA'] = tce_table['TIC Right Ascension']
tce_table['Sectors'] = tce_table['Sectors'].apply(lambda v: len(v.split(' ')))
tce_table['Transit_Depth'] = tce_table['Transit Depth Value']
tce_table['Dec'] = tce_table['TIC Declination']
tce_table['teff'] = tce_table['Effective Temperature Value']
tce_table['SN'] = tce_table['Signal-to-noise']
tce_table['Qingress'] = 0.0
tce_table['Tmag'] = tce_table['TMag Value']
tce_table['logg'] = tce_table['Surface Gravity Value']
tce_table['Epoc'] = tce_table['Epoch Value']
tce_table['star_rad'] = tce_table['Star Radius Value']
# G = 6.67e-8 in cgs
tce_table['star_mass'] = ((10 ** tce_table['logg']) * (tce_table['star_rad'] ** 2)) / 6.67e-8
tce_table = tce_table.set_index('tic_id')
# tce_table = tce_table.drop(columns=['Unnamed: 0'])

tce_table['Duration'] /= 24.0

tce_table

# # Drop some common invalid examples.
# # Orbits falling inside the star
# tce_table = tce_table[~tce_table.Ilabel]
# # Excessively large durations
# tce_table = tce_table[tce_table.Duration < 0.9 * tce_table.Period]

joined_table = tce_table

# ext_table = pd.read_csv(ext_data_file, header=0, low_memory=False).set_index('tic_id')
# joined_table = joined_table.join(ext_table, on='tic_id', how='left')

# joined_table = joined_table[
#     joined_table['objType'].isnull()
#     | (joined_table['objType'] == 'STAR')
# ]

joined_table = joined_table.reset_index()[[
    'tic_id', 'RA', 'Dec', 'Tmag', 'Epoc', 'Period', 'Duration',
    'Transit_Depth', 'Sectors', 'star_rad', 'star_mass', 'teff',
    'logg', 'SN', 'Qingress'
]]


disps = ['E', 'J', 'N', 'S', 'B']

for d in disps:
    joined_table[f'disp_{d}'] = 0

joined_table = joined_table.set_index('tic_id')
print(f'Total entries: {len(joined_table)}')

joined_table.to_csv('/mnt/tess/astronet/tces-toi.csv')

# python astronet/preprocess/generate_input_records.py --input_tce_csv_file=/mnt/tess/astronet/tces-toi.csv --tess_data_dir=/mnt/tess/lc --output_dir=/mnt/tess/astronet/tfrecords-toi --num_shards=1


Total entries: 2542


In [22]:
tce_table

Unnamed: 0_level_0,Source Pipeline,TIC,Full TOI ID,TOI Disposition,EXOFOP Disposition,TIC Right Ascension,TIC Declination,TMag Value,TMag Uncertainty,VMag Value,VMag Uncertainty,Epoch Value,Epoch Error,Orbital Period Value,Orbital Period Error,Transit Duration Value,Transit Duration Error,Transit Depth Value,Transit Depth Error,Sectors,Public Comment,Surface Gravity Value,Surface Gravity Uncertainty,Signal ID,Star Radius Value,Star Radius Error,Planet Radius Value,Planet Radius Error,Planet Equilibrium Temperature (K) Value,Effective Temperature Value,Effective Temperature Uncertainty,Effective Stellar Flux Value,Signal-to-noise,Centroid Offset,TFOP Master,TFOP SG1a,TFOP SG1b,TFOP SG2,TFOP SG3,TFOP SG4,TFOP SG5,Alerted,Updated,Duration,Period,RA,Transit_Depth,Dec,teff,SN,Qingress,Tmag,logg,Epoc,star_rad,star_mass
tic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1
176860064,spoc,176860064,1457.01,PC,APC,354.836680,45.719897,7.1140,0.006000,7.617,0.023,1766.359365,0.001300,6.375556,0.000700,2.784942,0.204000,2638.408383,40.55020,1,No information on the star; V-shaped,,,1,,,,,1545.000000,6393.0,280.600,947.389000,35.813244,,3.0,4.0,3.0,3.0,4.0,4.0,4.0,2019-12-05 16:42:06+0000,2019-12-05 16:42:06+0000,0.116039,6.375556,354.836680,2638.408383,45.719897,6393.0,35.813244,0.0,7.1140,,1766.359365,,
236887394,spoc,236887394,1465.01,KP,KP,303.381740,65.162081,11.8478,0.006100,12.692,0.103,1766.006734,0.000080,1.420023,0.000001,1.623014,0.009436,23593.258233,89.22266,4,Qatar-1 b,4.57248,0.082846,1,0.768108,0.045256,12.293303,0.726365,1251.952037,4910.0,108.354,580.592960,280.148770,False,5.0,5.0,5.0,5.0,5.0,5.0,5.0,2019-12-05 16:42:06+0000,2020-09-11 14:14:29+0000,0.067626,1.420023,303.381740,23593.258233,65.162081,4910.0,280.148770,0.0,11.8478,4.57248,1766.006734,0.768108,3.305208e+11
427654774,spoc,427654774,1474.01,PC,FP,349.287339,70.190021,14.1431,0.007508,16.502,0.252,1765.042474,0.003388,5.222621,0.001436,4.693629,0.613360,15867.992018,2007.71600,2,3-sigma centroid offset towards another star.,4.95337,0.008643,1,0.275946,0.008483,3.721602,0.381201,329.650849,3453.0,157.000,2.790858,8.069622,,2.0,4.0,2.0,2.0,4.0,4.0,4.0,2019-12-05 16:42:06+0000,2019-12-05 16:42:06+0000,0.195568,5.222621,349.287339,15867.992018,70.190021,3453.0,8.069622,0.0,14.1431,4.95337,1765.042474,0.275946,1.025398e+11
237086564,spoc,237086564,1466.01,PC,PC,307.395886,61.550433,11.0278,0.006000,12.189,0.069,1766.253229,0.001272,1.871892,0.000022,1.644789,0.260366,1031.773654,75.49385,2,potential L1 candidate,4.50790,0.110712,1,0.749646,0.071322,2.366370,1.968143,997.392351,4201.0,125.587,233.875718,12.584560,False,1.0,4.0,1.0,1.0,4.0,4.0,4.0,2019-12-05 16:42:06+0000,2020-09-23 11:19:07+0000,0.068533,1.871892,307.395886,1031.773654,61.550433,4201.0,12.584560,0.0,11.0278,4.50790,1766.253229,0.749646,2.713230e+11
240968774,spoc,240968774,1467.01,PC,PC,19.113929,49.233780,10.5977,0.007301,12.293,0.017,1766.983728,0.002812,5.973384,0.001413,1.605698,0.753235,1373.411277,166.74307,1,,4.74552,0.007707,1,0.490737,0.014515,1.827003,2.407974,525.913556,3834.0,157.000,18.079169,8.569228,,3.0,4.0,3.0,3.0,4.0,4.0,4.0,2019-12-05 16:42:06+0000,2019-12-05 16:42:06+0000,0.066904,5.973384,19.113929,1373.411277,49.233780,3834.0,8.569228,0.0,10.5977,4.74552,1766.983728,0.490737,2.009517e+11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126572546,qlp,126572546,2527.01,PC,,136.653389,-50.480484,11.1670,0.032000,,,1564.942827,0.003241,2.302134,0.000373,3.776000,0.310000,1140.000000,1.05888,2,possibly synchronized with variability; CTOI f...,3.54000,0.110000,1,3.450000,0.390000,11.951500,1.389960,2278.000000,6919.0,258.900,4476.820000,16.000000,False,3.0,4.0,3.0,3.0,4.0,4.0,4.0,2021-03-02 22:57:47+0000,2021-03-02 17:51:17+0000,0.157333,2.302134,136.653389,1140.000000,-50.480484,6919.0,16.000000,0.0,11.1670,3.54000,1564.942827,3.450000,6.187459e+11
380836882,qlp,380836882,2526.01,PC,,270.134854,-65.613847,11.0590,0.006000,,,1676.866770,0.001360,8.011524,0.000923,5.170000,0.444000,7810.000000,3.34366,1,odd-even likely from detrending transit at beg...,4.25000,0.080000,1,1.290000,0.070000,11.589300,0.651645,1222.000000,5923.0,139.100,370.573000,57.000000,False,3.0,4.0,3.0,3.0,4.0,4.0,4.0,2021-03-02 22:57:47+0000,2021-03-02 17:50:45+0000,0.215417,8.011524,270.134854,7810.000000,-65.613847,5923.0,57.000000,0.0,11.0590,4.25000,1676.866770,1.290000,4.436634e+11
149601126,qlp,149601126,2525.01,PC,,86.850783,-60.521408,13.4010,0.006000,,,2174.013303,0.006910,23.349908,0.000289,4.523000,0.577000,6090.000000,6.99742,16,potential multi; CTOI from Planet Hunters,4.51000,,1,0.820000,,6.636840,,556.000000,4944.0,122.000,15.958200,15.000000,False,3.0,4.0,3.0,3.0,4.0,4.0,4.0,2021-03-02 22:57:47+0000,2021-03-02 17:37:55+0000,0.188458,23.349908,86.850783,6090.000000,-60.521408,4944.0,15.000000,0.0,13.4010,4.51000,2174.013303,0.820000,3.262135e+11
149601126,qlp,149601126,2525.02,PC,,86.850783,-60.521408,13.4010,0.006000,,,2123.257222,0.006354,49.242380,0.000541,6.930000,1.202000,14290.000000,1601.01000,16,potential multi; CTOI from Planet Hunters,4.51000,,2,0.820000,,10.342100,,603.000000,4944.0,122.000,21.981400,1000.000000,False,3.0,4.0,3.0,3.0,4.0,4.0,4.0,2021-03-02 22:57:47+0000,2021-03-02 17:37:38+0000,0.288750,49.242380,86.850783,14290.000000,-60.521408,4944.0,1000.000000,0.0,13.4010,4.51000,2123.257222,0.820000,3.262135e+11


In [18]:
pd.set_option('display.max_columns', None)
joined_table.sample(5)

Unnamed: 0_level_0,RA,Dec,Tmag,Epoc,Period,Duration,Transit_Depth,Sectors,star_rad,star_mass,teff,logg,SN,Qingress,disp_E,disp_J,disp_N,disp_S,disp_B
tic_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
248391319,19.626777,-0.056509,12.2478,2116.583003,0.798947,0.0484,2303.819305,1,0.864137,,4785.0,4.45252,9.777322,0.0,0,0,0,0,0
13023738,347.084793,-29.59032,12.252,2104.87232,8.0635,0.1415,5180.0,2,1.71,,5657.0,3.97,22.0,0.0,0,0,0,0,0
219778329,263.851951,64.380452,8.1529,1742.874702,177.219004,0.232082,295.150196,12,1.15025,,5809.0,4.33351,8.084739,0.0,0,0,0,0,0
61988212,178.074472,-47.583402,14.2,1569.455416,0.555255,0.067585,23416.400387,1,1.06168,,5591.0,4.38111,16.535254,0.0,0,0,0,0,0
346929661,0.977583,59.334778,10.198,1770.16483,12.013,0.170875,820.0,1,1.23,,6178.0,4.33,12.0,0.0,0,0,0,0,0


### Run once

In [None]:
def load_tces_old():
    tceold = pd.read_csv('/mnt/tess/astronet/tces.csv', header=0).set_index('tic_id')

    # Only keep the max sectors read.
    maxsect = tceold.groupby('tic_id')['Sectors'].max()
    tceold = tceold.join(maxsect, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.Sectors == tceold.Sectors_max]

    # Then keep the max row ID.
    maxrowid = tceold.groupby('tic_id')['row_id'].max()
    tceold = tceold.join(maxrowid, on='tic_id', how='right', rsuffix='_max')
    tceold = tceold[tceold.row_id == tceold.row_id_max]

    return tceold

def generate_tce_bls_instar():
    tcenew = pd.read_csv('/mnt/tess/labels/tce_bls_instar.csv', header=0).set_index('tic_id')
    tceold = load_tces_old()
    tcenorth = pd.read_csv('/mnt/tess/labels/tce_north_instar.csv', header=0).set_index('tic_id')

    # Copy from old data where it's missing from the new.
    alltce = tcenew.join(tceold, how='outer', on='tic_id', rsuffix='_old')
    alltce = alltce.set_index('tic_id')

    alltce = alltce.drop(columns=['row_id'])

    def fillna(df, col_name):
        df.loc[df[col_name].isna(), col_name] = df.loc[df[col_name].isna(), col_name + '_old']

    fillna(alltce, 'toi_id')
    fillna(alltce, 'Disposition')
    fillna(alltce, 'RA')
    fillna(alltce, 'Dec')
    fillna(alltce, 'Tmag')
    fillna(alltce, 'Epoc')
    fillna(alltce, 'Period')
    fillna(alltce, 'Duration')
    fillna(alltce, 'Transit_Depth')
    fillna(alltce, 'Sectors')
    fillna(alltce, 'camera')
    fillna(alltce, 'ccd')
    fillna(alltce, 'star_rad')
    fillna(alltce, 'star_mass')
    fillna(alltce, 'teff')
    fillna(alltce, 'logg')
    fillna(alltce, 'SN')
    fillna(alltce, 'Qingress')

    alltce = alltce.drop(columns=[c for c in alltce.columns if c.endswith('_old')])
    
    alltce = alltce.append(tcenorth)
    
    alltce['Ilabel'] = alltce['Ilabel'].fillna(False)

    alltce.to_csv('/mnt/tess/labels/tce_bls_instar+old.csv')