In [None]:
import numpy as np
import pandas as pd

from os import path

In [None]:
# Data directory:
data_dir = "~/Scarlett/OneDrive - Liverpool John Moores University/SEPHI_data/"
#data_dir = "~/OneDrive/SEPHI_data/"
header_length = 116

In [None]:
# Read the NASA_EA data:
exoplanets = pd.read_csv( path.join(data_dir, f"NASA_EA_2022_02_09.csv"), skiprows=header_length, 
                         usecols=["pl_name", "hostname", "gaia_id", "sy_snum", "sy_pnum", "discoverymethod", "pl_orbper", "pl_orbsmax", "pl_rade", "pl_bmasse", "pl_dens", "pl_orbeccen", "pl_eqt", "pl_orbincl", "st_teff", "st_tefferr1", "st_tefferr2", "st_rad", "st_raderr1", "st_raderr2", "st_mass", "st_met", "st_lum", "st_lumerr1", "st_lumerr2", "st_logg", "st_age", "st_ageerr1", "st_ageerr2", "sy_dist", "sy_plx", "sy_gaiamag"])
# pl_orbper = orbital period [days]
# pl_orbsmax = orbit semi-major axis [au]

# Get rid of exoplanets whose star's haven't been observed by Gaia:
exoplanets.dropna(subset=["gaia_id"], inplace=True)

# The gaia_id in exoplanets is called the 'designation' in Gaia tables, so rename it:
# The designation is the Gaia + space + DR + no. + space + Gaia source_id
# e.g. Gaia DR2 3946945413106333696
exoplanets.rename(columns={ "gaia_id": "designation" }, inplace=True )

# Add a new column called 'source_id' which is listed in Gaia tables
# The source_id is the number in the designation, e.g. 3946945413106333696
exoplanets["source_id"] = exoplanets["designation"].str.rsplit(" ", n=1, expand=True)[1].astype("int64")

# Remove the space in 'hostname' (e.g. 11 Com -> 11Com):
#exoplanets["hostname"] = exoplanets["hostname"].str.replace(" ", "")

print("No. rows in NASA EA: ", exoplanets.shape[0])

In [None]:
# Create a csv containing the designations:
# (useful when using the designations to search for satrs in Gaia)
exoplanets[["designation"]].to_csv(path.join(data_dir, f"designations.csv"), index=False)

In [None]:
# TODO: convert luminosities from log(solar) to solar luminosity?
# TODO: (Done sephi_processing atm) Where possible,  calculate teff, L, and R for stars without them listed 
# TODO: Rename columns
# TODO: Save exoplanets df to a csv when happy with it