In [1]:
import pandas as pd
import numpy as np

from os import path

In [2]:
# Data directories:
CKS_dir = "~/OneDrive/SEPHI_data/CKS/"
data_date = "2022_04_28"

In [3]:
# CKS column names are specified in https://sites.astro.caltech.edu/~howard/cks/column-definitions.txt
# The file used contains both the stellar (CKS) and planet (Kepler) properties
# CKS data downloaded from 'Prameters' section here: https://california-planet-search.github.io/cks-website/

# The indexes are in CKS in column 0
index_column = 0

# These columns are used to identify the stars and their planets
identifiers = ["id_starname", "id_koicand", "id_kepler_name", "koi_disposition"]
#koi_disposition       Exoplanet Archive Disposition
#id_koi not actually in the csv

# Columns from Q19:
Q16 = ["koi_period", "koi_period_err1", "koi_period_err2", "koi_impact", "koi_impact_err1", "koi_impact_err2", "koi_prad", "koi_prad_err1", "koi_prad_err2", "koi_sma", "koi_sma_err1", "koi_sma_err2", "koi_slogg", "koi_slogg_err1", "koi_slogg_err2", "koi_smet", "koi_smet_err1", "koi_smet_err2", "koi_srad", "koi_srad_err1", "koi_srad_err2", "koi_smass", "koi_smass_err1", "koi_smass_err2", "koi_sage", "koi_sage_err1", "koi_sage_err2"]
# NB: the orbital period only directly appears in Q16
#koi_period            Orbital Period [days] 
#koi_impact            Impact Parameter [float]
#koi_ror               Planet-Star Radius Ratio [float]
#koi_srho              Fitted Stellar Density [g/cm**3]
#koi_prad              Planetary Radius [Earth radii]
#koi_sma               Orbit Semi-Major Axis [AU]
#koi_teq               Equilibrium Temperature [K]
#koi_insol             Insolation Flux [Earth flux]
#koi_dor               Planet-Star Distance over Star Radius [float]
#koi_model_snr         Transit Signal-to-Noise
#koi_count             Number of Planets
#koi_num_transits      Number of Transits
#koi_steff             Stellar Effective Temperature [K]
#koi_slogg             Stellar Surface Gravity [log10(cm/s**2)]
#koi_smet              Stellar Metallicity [dex]
#koi_srad              Stellar Radius [Solar radii

# Columns from CKS-I:
CKSI = ["cks_steff", "cks_steff_err1", "cks_steff_err2"]
#cks_fpsys      CKS False positive designation for star/system
#cks_fp                CKS False positive designation for candidate
#cks_slogg             CKS Stellar Surface Gravity [log10(cm/s**2)]
#cks_fpsys not in csv
# "cks_slogg", "cks_slogg_err1", "cks_slogg_err2", "cks_smet", "cks_smet_err1", "cks_smet_err2"

# Columns from CKS-II:
CKSII = ["iso_steff", "iso_steff_err1", "iso_steff_err2", "iso_srad", "iso_srad_err1", "iso_srad_err2", "iso_smass", "iso_smass_err1", "iso_smass_err2", "iso_sage", "iso_sage_err1", "iso_sage_err2", "iso_prad", "iso_prad_err1", "iso_prad_err2", "iso_sma", "iso_sma_err1", "iso_sma_err2"]
#iso_steff             CKS+Isochrone-constrained Effective Temperature [K]
#iso_slogg             CKS+Isochrone-constrained Stellar Surface Gravity [log10(cm/s**2)]
#iso_insol             CKS+Isochrone-constrained Incident flux [Earth units]
#iso_teq               CKS+Isochrone-constrained Planet equilibrium temperature (bond albedo = 0.3)
# , "iso_slogg", "iso_slogg_err1", "iso_slogg_err2", "iso_smet", "iso_smet_err1", "iso_smet_err2", 
#, "iso_teq", "iso_teq_err1", "iso_teq_err2", "iso_insol", "iso_insol_err1", "iso_insol_err2"

# Choosing the data sets/columns to be used:
columns = identifiers # The column names I will read include the identifiers and 
columns.extend(CKSI) # CKSI and
columns.extend(CKSII) # CKSII
#print(columns)

# Read the CKS file:
CKS_data = pd.read_csv(path.join(CKS_dir, f"cks_physical_merged.csv"),  usecols=columns)#, index_col=index_column)
# usecols should always be identifiers.extend(<data-sample>) so that the stars are identifiable
# TODO: does the CKS database get updated, i.e. when a planet is moved to confirmed??

In [6]:
# Do no drop unconfirmed objects, as these may have been confirmed since
# Only confirmed planets have a Kepler name, but that may not be up to date!

# Rename CKS column headings to make them easily identifiable:
# koi -> Q16 - except the koi_disposition which is the planet's (NEA) confirmed/not confirmed status
# cks -> CKSI
# iso -> CKSII
columns_new = [i.replace("koi_disposition", "status") for i in columns]
columns_new = [i.replace("koi_", "Q16_") for i in columns_new]
columns_new = [i.replace("_period", "_pperiod") for i in columns_new]
columns_new = [i.replace("_impact", "_pimpact") for i in columns_new]
columns_new = [i.replace("cks_", "CKSI_") for i in columns_new]
columns_new = [i.replace("iso_", "CKSII_") for i in columns_new]
#print(columns_new)

cols_dict = {columns[i]: columns_new[i] for i in range(len(columns))}
CKS_data.rename( columns=cols_dict, inplace=True )
CKS_data.rename( columns={"id_starname":"st_koi_name", "id_koicand":"pl_koi_name", "id_kepler_name":"pl_kepler_name"}, inplace=True )
CKS_length = CKS_data["st_koi_name"].size
print(CKS_data)

#TODO: you may also want to change the stellar parameter indicator from 's' to 'st' to align with NEA 
# TODO: and change "koi_sma" = Orbit Semi-Major Axis [AU] to something without 's' in

     st_koi_name pl_koi_name pl_kepler_name          status  CKSI_steff  \
0         K00001   K00001.01     Kepler-1 b       CONFIRMED      5818.8   
1         K00002   K00002.01     Kepler-2 b       CONFIRMED      6448.7   
2         K00003   K00003.01     Kepler-3 b       CONFIRMED      4864.3   
3         K00006   K00006.01            NaN  FALSE POSITIVE      6348.1   
4         K00007   K00007.01     Kepler-4 b       CONFIRMED      5826.9   
...          ...         ...            ...             ...         ...   
2020      K05929   K05929.01            NaN       CANDIDATE      5503.7   
2021      K05932   K05932.01            NaN       CANDIDATE      5430.6   
2022      K05949   K05949.01            NaN       CANDIDATE      5790.6   
2023      K05953   K05953.01            NaN       CANDIDATE      5094.2   
2024      K06102   K06102.01            NaN       CANDIDATE      6322.9   

      CKSI_steff_err1  CKSI_steff_err2  CKSII_steff  CKSII_steff_err1  \
0                60.0     

In [7]:
new_cols = [i.replace("_parallax", "_pl_parallax") for i in columns]
new_cols = [i.replace("koi_sma", "Q16_pl_sma") for i in new_cols]
new_cols = [i.replace("_period", "_pl_period") for i in new_cols]
new_cols = [i.replace("_impact", "_pl_impact") for i in new_cols]
new_cols = [i.replace("koi_model_snr", "Q16_SNR") for i in new_cols]
new_cols = [i.replace("koi_smass", "CKSI_st_mass") for i in new_cols]
new_cols = [i.replace("iso_smass", "CKSII_st_mass") for i in new_cols]

new_cols = [i.replace("_err", "err") for i in new_cols]
new_cols = [i.replace("koi_", "Q16_") for i in new_cols]
new_cols = [i.replace("cks_", "CKSI_") for i in new_cols]
new_cols = [i.replace("iso_", "CKSII_") for i in new_cols]

#new_cols = [i.replace("_p", "_pl") for i in new_cols]
#new_cols = [i.replace("_s", "_st") for i in new_cols]
print(new_cols)

#TODO: halpp
# I could make a dictionary and get it to find column headings and their respective new heading?

cols_dict = {columns[i]: new_cols[i] for i in range(len(columns))}
print(cols_dict)
CKS_data.rename( columns=cols_dict, inplace=True )

tricky_cols = {"id_starname":"st_koi_name",
              "id_koicand":"pl_koi_name",
              "id_kepler_name":"pl_kepler_name"}

CKS_data.rename( columns=tricky_cols, inplace=True )
CKS_length = CKS_data["st_koi_name"].size
print(CKS_data)

['id_starname', 'id_koicand', 'id_kepler_name', 'Q16_disposition', 'CKSI_steff', 'CKSI_stefferr1', 'CKSI_stefferr2', 'CKSII_steff', 'CKSII_stefferr1', 'CKSII_stefferr2', 'CKSII_srad', 'CKSII_sraderr1', 'CKSII_sraderr2', 'CKSII_st_mass', 'CKSII_st_masserr1', 'CKSII_st_masserr2', 'CKSII_sage', 'CKSII_sageerr1', 'CKSII_sageerr2', 'CKSII_prad', 'CKSII_praderr1', 'CKSII_praderr2', 'CKSII_sma', 'CKSII_smaerr1', 'CKSII_smaerr2']
{'id_starname': 'id_starname', 'id_koicand': 'id_koicand', 'id_kepler_name': 'id_kepler_name', 'koi_disposition': 'Q16_disposition', 'cks_steff': 'CKSI_steff', 'cks_steff_err1': 'CKSI_stefferr1', 'cks_steff_err2': 'CKSI_stefferr2', 'iso_steff': 'CKSII_steff', 'iso_steff_err1': 'CKSII_stefferr1', 'iso_steff_err2': 'CKSII_stefferr2', 'iso_srad': 'CKSII_srad', 'iso_srad_err1': 'CKSII_sraderr1', 'iso_srad_err2': 'CKSII_sraderr2', 'iso_smass': 'CKSII_st_mass', 'iso_smass_err1': 'CKSII_st_masserr1', 'iso_smass_err2': 'CKSII_st_masserr2', 'iso_sage': 'CKSII_sage', 'iso_sage_

In [21]:
# Checking whether confirmed planets alone have Kepler planet names
# Current values are CANDIDATE, FALSE POSITIVE, NOT DISPOSITIONED or CONFIRMED.
# A not dispositioned value corresponds to objects for which the disposition tests have not yet been completed. A false positive has failed at least one of the tests described 
#in Batalha et al. (2012). A planetary candidate has passed all prior tests conducted to identify false positives, although this does not a priori mean that all possible tests have 
#been conducted. 

c1 = np.where(CKS_data["status"] == "CONFIRMED")
print(c1[0].size, "confirmed Kepler planets in CKS.")

c2 = np.where(CKS_data["status"] == "NOT DISPOSITIONED")
print(c2[0].size, "not dispositioned Kepler planets in CKS.")

print(c1[0].size + c2[0].size, "confirmed + not dispositioned Kepler planets in CKS.")

c3 = np.where(CKS_data["status"] == "FALSE POSITIVE")
print(c3[0].size, "false positive Kepler planets in CKS.")


c4 = np.where( (CKS_data["status"] == "FALSE POSITIVE") & CKS_data["pl_kepler_name"].notnull() )
print(c4[0].size)
# No false positives have kepler names

c5 = np.where( (CKS_data["status"] == "NOT DISPOSITIONED") & CKS_data["pl_kepler_name"].notnull() )
print(c5[0].size)
# No not dispositioned planets (tests not yet completed) have kepler names

c6 = np.where((CKS_data["status"] == "CANDIDATE") & CKS_data["pl_kepler_name"].notnull() )
print(c5[0].size)
# No candidates (yet to be confirmed) have kepler names

c7 = np.where( (CKS_data["status"] == "CONFIRMED") & CKS_data["pl_kepler_name"].isna() )
print(c7[0].size, "confirmed Kepler planets without Kepler names in CKS.")
# All confirmed planets have kepler names

c8 = np.where(CKS_data["pl_koi_name"].isna())
print("Planets in CKS with no pl_koi_name:", c8[0].size)
# all planets in CKS have koi names/numbers

1298 confirmed Kepler planets in CKS.
464 not dispositioned Kepler planets in CKS.
1762 confirmed + not dispositioned Kepler planets in CKS.
53 false positive Kepler planets in CKS.
0
0
0
0 confirmed Kepler planets without Kepler names in CKS.
Planets in CKS with no pl_koi_name: 0


In [None]:
# TODO: save CKS_processed (new column headings)
