In [1]:
# imports
import numpy as np
import pickle
from tqdm import tqdm
import hickle
import csv

In [2]:
# function for loading all quantities of interest from the injection-recover experiments
# note that the pickle files are too big for GitHub, but they can regenerated using the 'exoplanet_catalog_fits.py' files
def load_simulation_results(filename, num_nodes=768, loop_max=1000):
    # create lists to store the results
    ras, decs, stellar_masses, G_mags, dists = [], [], [], [], []
    recov_planet_masses, recov_periods, recov_Tps, recov_eccs, recov_omegas, recov_incs, recov_ws = [], [], [], [], [], [], []
    true_planet_masses, true_periods, true_Tps, true_eccs, true_omegas, true_incs, true_ws = [], [], [], [], [], [], []
    no_pl_chi2s, one_pl_chi2s = [], []
    median_dists, lower_dists, upper_dists = [], [], []
    median_periods, lower_periods, upper_periods = [], [], []
    median_masses, lower_masses, upper_masses = [], [], []
    median_incs, lower_incs, upper_incs = [], [], []
    median_eccs, lower_eccs, upper_eccs = [], [], []
    
    # loop through number of nodes, collecting all quantities
    for run_num in tqdm(range(num_nodes)):
        f = open("Data/" + filename + "/file" + str(run_num) + ".pkl", "rb")
        count = 0
        for i in range(loop_max):
            try:
                # load and unpack results
                results = pickle.load(f)
                stellar_params, astrom_data, true_params, recov_params, no_pl_chi2, one_pl_chi2, MCMC_data = results
                source_id, ra, dec, parallax, pmra, pmdec, mass, G_mag, BP, RP, ruwe = stellar_params
                
                # record true stellar properties
                ras.append(ra)
                decs.append(dec)
                stellar_masses.append(mass)
                dists.append(1000/parallax)
                G_mags.append(G_mag)

                # record best-fit parameters for the planet
                recov_planet_mass, recov_period, recov_Tp, recov_ecc, recov_omega, recov_inc, recov_w = recov_params
                recov_planet_masses.append(1047.57*recov_planet_mass)
                recov_periods.append(recov_period)
                recov_Tps.append(recov_Tp)
                recov_eccs.append(recov_ecc)
                recov_omegas.append(recov_omega)
                recov_incs.append(recov_inc)
                recov_ws.append(recov_w)

                # record true parameters for the planet
                true_planet_mass, true_period, true_Tp, true_ecc, true_omega, true_inc, true_w = true_params
                true_planet_masses.append(1047.57*true_planet_mass)
                true_periods.append(true_period)
                true_Tps.append(true_Tp)
                true_eccs.append(true_ecc)
                true_omegas.append(true_omega)
                true_incs.append(true_inc)
                true_ws.append(true_w)

                # record chi2 values
                no_pl_chi2s.append(no_pl_chi2)
                one_pl_chi2s.append(one_pl_chi2)
                
                # record median and 1-sigma distance values
                lower_dist, median_dist, upper_dist = MCMC_data[0][1], MCMC_data[0][2], MCMC_data[0][3]
                median_dists.append(median_dist)
                lower_dists.append(lower_dist)
                upper_dists.append(upper_dist)

                # record median and 1-sigma orbital periods
                lower_period, median_period, upper_period = MCMC_data[1][1], MCMC_data[1][2], MCMC_data[1][3]
                median_periods.append(median_period)
                lower_periods.append(lower_period)
                upper_periods.append(upper_period)
                
                # record median and 1-sigma planet masses
                lower_mp, median_mp, upper_mp = MCMC_data[2][1], MCMC_data[2][2], MCMC_data[2][3]
                median_masses.append(median_mp)
                lower_masses.append(lower_mp)
                upper_masses.append(upper_mp)
                
                # record median and 1-sigma inclinations
                lower_inc, median_inc, upper_inc = MCMC_data[4][1], MCMC_data[4][2], MCMC_data[4][3]
                median_incs.append(median_inc)
                lower_incs.append(lower_inc)
                upper_incs.append(upper_inc)
                
                # record median and 1-sigma eccentricities
                lower_ecc, median_ecc, upper_ecc = MCMC_data[3][1], MCMC_data[3][2], MCMC_data[3][3]
                median_eccs.append(median_ecc)
                lower_eccs.append(lower_ecc)
                upper_eccs.append(upper_ecc)
                
            except Exception as e: # catch crash when the file doesn't exist
                None
        f.close()
            
    # convert all lists to numpy arrays
    ras, decs, stellar_masses, G_mags, dists = np.array(ras), np.array(decs), np.array(stellar_masses), np.array(G_mags), np.array(dists)
    recov_planet_masses, recov_periods, recov_Tps, recov_eccs, recov_omegas, recov_incs, recov_ws = np.array(recov_planet_masses), np.array(recov_periods), np.array(recov_Tps), np.array(recov_eccs), np.array(recov_omegas), np.array(recov_incs), np.array(recov_ws)
    true_planet_masses, true_periods, true_Tps, true_eccs, true_omegas, true_incs, true_ws = np.array(true_planet_masses), np.array(true_periods), np.array(true_Tps), np.array(true_eccs), np.array(true_omegas), np.array(true_incs), np.array(true_ws)
    no_pl_chi2s, one_pl_chi2s = np.array(no_pl_chi2s), np.array(one_pl_chi2s)
    median_dists, lower_dists, upper_dists = np.array(median_dists), np.array(lower_dists), np.array(upper_dists)
    median_periods, lower_periods, upper_periods = np.array(median_periods), np.array(lower_periods), np.array(upper_periods)
    median_masses, lower_masses, upper_masses = np.array(median_masses), np.array(lower_masses), np.array(upper_masses)
    median_incs, lower_incs, upper_incs = np.array(median_incs), np.array(lower_incs), np.array(upper_incs)
    median_eccs, lower_eccs, upper_eccs = np.array(median_eccs), np.array(lower_eccs), np.array(upper_eccs)
    
    # return all results
    return [ras, decs, stellar_masses, G_mags, dists, recov_planet_masses, recov_periods, recov_Tps, recov_eccs, recov_omegas, recov_incs, recov_ws, true_planet_masses,
            true_periods, true_Tps, true_eccs, true_omegas, true_incs, true_ws, no_pl_chi2s, one_pl_chi2s, median_dists, lower_dists, upper_dists, median_periods, lower_periods,
            upper_periods, median_masses, lower_masses, upper_masses, median_incs, lower_incs, upper_incs, median_eccs, lower_eccs, upper_eccs]

# Creating DR4 catalog

In [3]:
# load all properties of interest
DR4_ras, DR4_decs, DR4_stellar_masses, DR4_G_mags, DR4_dists, DR4_recov_planet_masses, DR4_recov_periods, DR4_recov_Tps,\
DR4_recov_eccs, DR4_recov_omegas, DR4_recov_incs, DR4_recov_ws, DR4_true_planet_masses, DR4_true_periods,\
DR4_true_Tps, DR4_true_eccs, DR4_true_omegas, DR4_true_incs, DR4_true_ws, DR4_no_pl_chi2s, DR4_one_pl_chi2s, DR4_median_dists,\
DR4_lower_dists, DR4_upper_dists, DR4_median_periods, DR4_lower_periods, DR4_upper_periods, DR4_median_masses, DR4_lower_masses,\
DR4_upper_masses, DR4_median_incs, DR4_lower_incs, DR4_upper_incs, DR4_median_eccs, DR4_lower_eccs, DR4_upper_eccs = load_simulation_results("Gaia_real_stars_recover_DR4")

100%|██████████████████████████████████████████████████████████| 768/768 [00:01<00:00, 670.48it/s]


In [4]:
len(DR4_dists)

44046

In [5]:
# remove the few solutions in which the fitting procedure has failed, resulting in e ~ 1 or P >~ 20 year
mask_bad_solutions = ((DR4_recov_eccs < 0.989) & (DR4_recov_periods < 365.25*20.0))

# apply nominal detection criteria (we only recorded cases with Delta chi2 > 50, so this has already been applied)
mask_nominal = (DR4_recov_planet_masses < 13.0) & (DR4_upper_periods/DR4_lower_periods < 1.5)
mask = (mask_bad_solutions & mask_nominal)

# get all the resulting arrays, and change units as necessary
DR4_masked_ras = DR4_ras[mask]
DR4_masked_decs = DR4_decs[mask]
DR4_masked_dists = DR4_dists[mask]
DR4_masked_stellar_masses = DR4_stellar_masses[mask]
DR4_masked_G_mags = DR4_G_mags[mask]
DR4_masked_true_planet_masses = DR4_true_planet_masses[mask]
DR4_masked_true_periods = DR4_true_periods[mask]
DR4_masked_true_Tps = DR4_true_Tps[mask]
DR4_masked_true_eccs = DR4_true_eccs[mask]
DR4_masked_true_omegas = (180.0/np.pi)*DR4_true_omegas[mask] # this is "big omega"
DR4_masked_true_incs = (180.0/np.pi)*DR4_true_incs[mask]
DR4_masked_true_ws = (180.0/np.pi)*DR4_true_ws[mask] # this is "little omega"
DR4_masked_recov_planet_masses = DR4_recov_planet_masses[mask]
DR4_masked_recov_periods = DR4_recov_periods[mask]
DR4_masked_recov_Tps = DR4_recov_Tps[mask]
DR4_masked_recov_eccs = DR4_recov_eccs[mask]
DR4_masked_recov_omegas = (180.0/np.pi)*DR4_recov_omegas[mask] # this is "big omega"
DR4_masked_recov_incs = (180.0/np.pi)*DR4_recov_incs[mask]
DR4_masked_recov_ws = (180.0/np.pi)*DR4_recov_ws[mask] # this is "little omega"
DR4_masked_delta_chi2s = (DR4_no_pl_chi2s - DR4_one_pl_chi2s)[mask]
DR4_masked_median_dists = DR4_median_dists[mask]
DR4_masked_lower_dists = DR4_lower_dists[mask]
DR4_masked_upper_dists = DR4_upper_dists[mask]
DR4_masked_median_periods = DR4_median_periods[mask]
DR4_masked_lower_periods = DR4_lower_periods[mask]
DR4_masked_upper_periods = DR4_upper_periods[mask]
DR4_masked_median_masses = 1047.57*DR4_median_masses[mask]
DR4_masked_lower_masses = 1047.57*DR4_lower_masses[mask]
DR4_masked_upper_masses = 1047.57*DR4_upper_masses[mask]
DR4_masked_median_incs = (180.0/np.pi)*DR4_median_incs[mask]
DR4_masked_lower_incs = (180.0/np.pi)*DR4_lower_incs[mask]
DR4_masked_upper_incs = (180.0/np.pi)*DR4_upper_incs[mask]
DR4_masked_median_eccs = DR4_median_eccs[mask]
DR4_masked_lower_eccs = DR4_lower_eccs[mask]
DR4_masked_upper_eccs = DR4_upper_eccs[mask]

In [6]:
len(DR4_masked_dists)

7545

In [7]:
# load source ids, which came from a query (something went wrong with the source IDs saved in the pickle files)
DR4_masked_source_ids = hickle.load('Data/corrected_DR4_source_ids.pkl')
len(DR4_masked_source_ids)

7545

In [8]:
# choose csv header names
csv_header = ['Gaia source ID', 'True distance [pc]', 'True RA [deg]', 'True Dec [deg]', 'Stellar mass [M_\odot]', 'G-band mag',
              'True planet mass [M_J]', 'True period [days]', 'True inclination [deg]', 'True eccentricity', 'True omega [deg]', 'True Omega [deg]', 'True T_peri [days]',
              'Best-fit planet mass [M_J]', 'Best-fit period [days]', 'Best-fit inclination [deg]', 'Best-fit eccentricity', 'Best-fit omega [deg]', 'Best-fit Omega [deg]', 'Best-fit T_peri [days]',
              'MCMC distance 16th [pc]', 'MCMC distance 50th [pc]', 'MCMC distance 84th [pc]', 'MCMC period 16th [days]', 'MCMC period 50th [days]', 'MCMC period 84th [days]',
              'MCMC planet mass 16th [M_J]', 'MCMC planet mass 50th [M_J]', 'MCMC planet mass 84th [M_J]', 'MCMC eccentricity 16th', 'MCMC eccentricity 50th', 'MCMC eccentricity 84th',
              'MCMC inclination 16th [deg]', 'MCMC inclination 50th [deg]', 'MCMC inclination 84th [deg]', '\Delta \chi^2']

In [9]:
# create list of all the data
num_places = 5
data = [csv_header]
for i in range(len(DR4_masked_dists)):
    temp_list = [DR4_masked_source_ids[i], round(DR4_masked_dists[i], num_places), DR4_masked_ras[i], DR4_masked_decs[i], round(DR4_masked_stellar_masses[i], num_places), round(DR4_masked_G_mags[i], num_places),
                 round(DR4_masked_true_planet_masses[i], num_places), round(DR4_masked_true_periods[i], num_places), round(DR4_masked_true_incs[i], num_places), round(DR4_masked_true_eccs[i], num_places), round(DR4_masked_true_ws[i], num_places), round(DR4_masked_true_omegas[i], num_places), round(DR4_masked_true_Tps[i], num_places),
                 round(DR4_masked_recov_planet_masses[i], num_places), round(DR4_masked_recov_periods[i], num_places), round(DR4_masked_recov_incs[i], num_places), round(DR4_masked_recov_eccs[i], num_places), round(DR4_masked_recov_ws[i], num_places), round(DR4_masked_recov_omegas[i], num_places), round(DR4_masked_recov_Tps[i], num_places),
                 round(DR4_masked_lower_dists[i], num_places), round(DR4_masked_median_dists[i], num_places), round(DR4_masked_upper_dists[i], num_places), round(DR4_masked_lower_periods[i], num_places), round(DR4_masked_median_periods[i], num_places), round(DR4_masked_upper_periods[i], num_places),
                 round(DR4_masked_lower_masses[i], num_places), round(DR4_masked_median_masses[i], num_places), round(DR4_masked_upper_masses[i], num_places), round(DR4_masked_lower_eccs[i], num_places), round(DR4_masked_median_eccs[i], num_places), round(DR4_masked_upper_eccs[i], num_places),
                 round(DR4_masked_lower_incs[i], num_places), round(DR4_masked_median_incs[i], num_places), round(DR4_masked_upper_incs[i], num_places), round(DR4_masked_delta_chi2s[i], num_places)]
    data.append(temp_list)

In [10]:
# save to csv file
with open('DR4_mock_exoplanet_catalog.csv', 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerows(data)

# Creating DR5 catalog

In [11]:
# load all properties of interest
DR5_ras, DR5_decs, DR5_stellar_masses, DR5_G_mags, DR5_dists, DR5_recov_planet_masses, DR5_recov_periods, DR5_recov_Tps,\
DR5_recov_eccs, DR5_recov_omegas, DR5_recov_incs, DR5_recov_ws, DR5_true_planet_masses, DR5_true_periods,\
DR5_true_Tps, DR5_true_eccs, DR5_true_omegas, DR5_true_incs, DR5_true_ws, DR5_no_pl_chi2s, DR5_one_pl_chi2s, DR5_median_dists,\
DR5_lower_dists, DR5_upper_dists, DR5_median_periods, DR5_lower_periods, DR5_upper_periods, DR5_median_masses, DR5_lower_masses,\
DR5_upper_masses, DR5_median_incs, DR5_lower_incs, DR5_upper_incs, DR5_median_eccs, DR5_lower_eccs, DR5_upper_eccs = load_simulation_results("Gaia_real_stars_recover_DR5")

100%|██████████████████████████████████████████████████████████| 768/768 [00:04<00:00, 157.85it/s]


In [12]:
len(DR5_dists)

379181

In [13]:
# remove the few solutions in which the fitting procedure has failed, resulting in e ~ 1 or P >~ 20 year
mask_bad_solutions = ((DR5_recov_eccs < 0.989) & (DR5_recov_periods < 365.25*20.0))

# apply nominal detection criteria (we only recorded cases with Delta chi2 > 50, so this has already been applied)
mask_nominal = (DR5_recov_planet_masses < 13.0) & (DR5_upper_periods/DR5_lower_periods < 1.5)
mask = (mask_bad_solutions & mask_nominal)

# get all the resulting arrays, and change units as necessary
DR5_masked_ras = DR5_ras[mask]
DR5_masked_decs = DR5_decs[mask]
DR5_masked_dists = DR5_dists[mask]
DR5_masked_stellar_masses = DR5_stellar_masses[mask]
DR5_masked_G_mags = DR5_G_mags[mask]
DR5_masked_true_planet_masses = DR5_true_planet_masses[mask]
DR5_masked_true_periods = DR5_true_periods[mask]
DR5_masked_true_Tps = DR5_true_Tps[mask]
DR5_masked_true_eccs = DR5_true_eccs[mask]
DR5_masked_true_omegas = (180.0/np.pi)*DR5_true_omegas[mask] # this is "big omega"
DR5_masked_true_incs = (180.0/np.pi)*DR5_true_incs[mask]
DR5_masked_true_ws = (180.0/np.pi)*DR5_true_ws[mask] # this is "little omega"
DR5_masked_recov_planet_masses = DR5_recov_planet_masses[mask]
DR5_masked_recov_periods = DR5_recov_periods[mask]
DR5_masked_recov_Tps = DR5_recov_Tps[mask]
DR5_masked_recov_eccs = DR5_recov_eccs[mask]
DR5_masked_recov_omegas = (180.0/np.pi)*DR5_recov_omegas[mask] # this is "big omega"
DR5_masked_recov_incs = (180.0/np.pi)*DR5_recov_incs[mask]
DR5_masked_recov_ws = (180.0/np.pi)*DR5_recov_ws[mask] # this is "little omega"
DR5_masked_delta_chi2s = (DR5_no_pl_chi2s - DR5_one_pl_chi2s)[mask]
DR5_masked_median_dists = DR5_median_dists[mask]
DR5_masked_lower_dists = DR5_lower_dists[mask]
DR5_masked_upper_dists = DR5_upper_dists[mask]
DR5_masked_median_periods = DR5_median_periods[mask]
DR5_masked_lower_periods = DR5_lower_periods[mask]
DR5_masked_upper_periods = DR5_upper_periods[mask]
DR5_masked_median_masses = 1047.57*DR5_median_masses[mask]
DR5_masked_lower_masses = 1047.57*DR5_lower_masses[mask]
DR5_masked_upper_masses = 1047.57*DR5_upper_masses[mask]
DR5_masked_median_incs = (180.0/np.pi)*DR5_median_incs[mask]
DR5_masked_lower_incs = (180.0/np.pi)*DR5_lower_incs[mask]
DR5_masked_upper_incs = (180.0/np.pi)*DR5_upper_incs[mask]
DR5_masked_median_eccs = DR5_median_eccs[mask]
DR5_masked_lower_eccs = DR5_lower_eccs[mask]
DR5_masked_upper_eccs = DR5_upper_eccs[mask]

In [14]:
len(DR5_masked_dists)

117305

In [15]:
# load source ids, which came from a query (something went wrong with the source IDs saved in the pickle files)
DR5_masked_source_ids = hickle.load('Data/corrected_DR5_source_ids.pkl')
len(DR5_masked_source_ids)

117305

In [16]:
# choose csv header names
csv_header = ['Gaia source ID', 'True distance [pc]', 'True RA [deg]', 'True Dec [deg]', 'Stellar mass [M_\odot]', 'G-band mag',
              'True planet mass [M_J]', 'True period [days]', 'True inclination [deg]', 'True eccentricity', 'True omega [deg]', 'True Omega [deg]', 'True T_peri [days]',
              'Best-fit planet mass [M_J]', 'Best-fit period [days]', 'Best-fit inclination [deg]', 'Best-fit eccentricity', 'Best-fit omega [deg]', 'Best-fit Omega [deg]', 'Best-fit T_peri [days]',
              'MCMC distance 16th [pc]', 'MCMC distance 50th [pc]', 'MCMC distance 84th [pc]', 'MCMC period 16th [days]', 'MCMC period 50th [days]', 'MCMC period 84th [days]',
              'MCMC planet mass 16th [M_J]', 'MCMC planet mass 50th [M_J]', 'MCMC planet mass 84th [M_J]', 'MCMC eccentricity 16th', 'MCMC eccentricity 50th', 'MCMC eccentricity 84th',
              'MCMC inclination 16th [deg]', 'MCMC inclination 50th [deg]', 'MCMC inclination 84th [deg]', '\Delta \chi^2']

In [17]:
# create list of all the data
num_places = 5
data = [csv_header]
for i in range(len(DR5_masked_dists)):
    temp_list = [DR5_masked_source_ids[i], round(DR5_masked_dists[i], num_places), DR5_masked_ras[i], DR5_masked_decs[i], round(DR5_masked_stellar_masses[i], num_places), round(DR5_masked_G_mags[i], num_places),
                 round(DR5_masked_true_planet_masses[i], num_places), round(DR5_masked_true_periods[i], num_places), round(DR5_masked_true_incs[i], num_places), round(DR5_masked_true_eccs[i], num_places), round(DR5_masked_true_ws[i], num_places), round(DR5_masked_true_omegas[i], num_places), round(DR5_masked_true_Tps[i], num_places),
                 round(DR5_masked_recov_planet_masses[i], num_places), round(DR5_masked_recov_periods[i], num_places), round(DR5_masked_recov_incs[i], num_places), round(DR5_masked_recov_eccs[i], num_places), round(DR5_masked_recov_ws[i], num_places), round(DR5_masked_recov_omegas[i], num_places), round(DR5_masked_recov_Tps[i], num_places),
                 round(DR5_masked_lower_dists[i], num_places), round(DR5_masked_median_dists[i], num_places), round(DR5_masked_upper_dists[i], num_places), round(DR5_masked_lower_periods[i], num_places), round(DR5_masked_median_periods[i], num_places), round(DR5_masked_upper_periods[i], num_places),
                 round(DR5_masked_lower_masses[i], num_places), round(DR5_masked_median_masses[i], num_places), round(DR5_masked_upper_masses[i], num_places), round(DR5_masked_lower_eccs[i], num_places), round(DR5_masked_median_eccs[i], num_places), round(DR5_masked_upper_eccs[i], num_places),
                 round(DR5_masked_lower_incs[i], num_places), round(DR5_masked_median_incs[i], num_places), round(DR5_masked_upper_incs[i], num_places), round(DR5_masked_delta_chi2s[i], num_places)]
    data.append(temp_list)

In [18]:
# save to file
with open('DR5_mock_exoplanet_catalog.csv', 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerows(data)