In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#filename = r"C:\Exoplanet\PS_2026.01.15_09.23.48.csv"
filename = "C:\Exoplanet\composite_exo_data-unfiltered.csv"
df = pd.read_csv(filename, comment='#', low_memory=False) # comment="#" skips NASA metadata lines
# low_memory=False avoids mixed dtype warnings
#print(df.columns)

df_filtered = df[
    (
        df["pl_rade"].between(1.5, 4) &
        df["pl_bmasse"].notna() &
        df["st_teff"].notna() &
        df["st_rad"].notna() &
        df["pl_orbsmax"].notna() &
        df["sy_jmag"].notna() &
        df["pl_trandur"].notna() &
        df["pl_bmasseerr1"].notna() &
        df["pl_bmasseerr2"].notna() &
        df["sy_snum"].between(1,1)                         
         
    )
].copy()  # Filter the table to select sub-Neptunes and removes rows with missing values
# .copy() avoids pandas "SettingWithCopyWarning"

Rp = df_filtered["pl_rade"].values  # planet radius
Mp = df_filtered["pl_bmasse"].values # planet mass
starsN= df_filtered["sy_snum"]  #number of stars
Tstar = df_filtered["st_teff"].values       # Eq temp of star in K
Rstar = df_filtered["st_rad"].values        # temp of star in R_sun
a = df_filtered["pl_orbsmax"].values        # Orbital semimajor axis in AU
Jmag = df_filtered["sy_jmag"].values
transit = df_filtered["pl_trandur"].values
upperunc=df_filtered["pl_bmasseerr1"].values
lowerunc=df_filtered["pl_bmasseerr2"].values 
Rstar_AU = Rstar * 0.00465047  # Conversion of radius of star into AU

Teq = Tstar * np.sqrt(Rstar_AU / (2 * a)) * (1-0.3)**(1/4)  #Equilibrium temperature

df_filtered["Teq_calc"] = Teq
Teq = df_filtered["Teq_calc"].values

def scale_factor(Rp): #code which gives scale factor values depending on radius
    if Rp < 1.5:
        return 0.19
    elif Rp < 2.75:
        return 1.26
    elif Rp < 4.0:
        return 1.28
    else:
        return 1.15

S = np.array([scale_factor(r) for r in Rp])  #scale factors for each planet

TSM = S * (Rp**3 * Teq) / (Mp * Rstar**2) * 10**(-Jmag/5) # transmission spectroscopy metric formula

df_filtered["TSM"] = TSM  #stores TSM values in the filtered table

df_sorted = df_filtered.sort_values("TSM", ascending=False)  # ranks planets by TSM values with highest (best) first

ranked = df_sorted[["pl_name", "pl_rade", "pl_bmasse", "Teq_calc", "TSM" , "pl_trandur"]]

print(ranked.head(20)) #leftmost column shows position in the dataframe for each row

print("Number of filtered planets is",len(df_filtered))

top20 = df_sorted.head(20)
total_duration_hours = top20["pl_trandur"].sum()
print("Total transit duration of top 20 TSM planets:", total_duration_hours, "hours")



observationtime=(total_duration_hours)*2
print("Total observing time if each transit is measured once is",observationtime)



          pl_name   pl_rade  pl_bmasse     Teq_calc         TSM  pl_trandur
213     GJ 1214 b  2.733000       8.41   518.405137  380.584732    0.869660
4856    L 98-59 d  1.627000       1.64   380.657903  327.812477    0.840000
54       AU Mic b  3.956770      20.12   549.267345  319.920731    3.492700
958   HD 219134 b  1.602000       4.74   928.562830  268.059893    0.945000
6062     pi Men c  2.018900       3.63  1094.236069  242.520049    2.952024
664   HD 136352 c  2.916000      11.24   619.417654  214.891116    3.251000
861   HD 191939 d  2.995000       2.80   495.301520  208.188326    5.360000
242     GJ 3090 b  2.130000       3.34   633.297366  201.591013    1.281000
959   HD 219134 c  1.511000       4.36   715.396134  188.393035    1.660000
1020  HD 260655 c  1.533000       3.09   509.994434  179.835293    0.980000
5552    TOI-431 d  3.290000       9.90   584.245431  174.149824    3.319578
5644    TOI-544 b  2.018000       2.89   967.592727  170.825496    1.210000
1391   HIP 9

In [3]:
max_tsm = df_sorted["TSM"].max()

# Calculate relative number of transits
df_sorted["N_transits"] = max_tsm / df_sorted["TSM"]

# Round to nearest integer and enforce minimum of 1 transit
df_sorted["N_transits"] = df_sorted["N_transits"].round().astype(int)
df_sorted["N_transits"] = df_sorted["N_transits"].clip(lower=1)

# View results
df_sorted[["pl_name", "TSM", "N_transits"]].head(20)

df_sorted["total_transit_duration"] = (
    df_sorted["N_transits"] * 2.0 * df_sorted["pl_trandur"]
)


top20 = df_sorted.head(20)

total_duration_top20 = (top20["total_transit_duration"].sum())*2

print(f"Total observing time for top 20 TSM planets accounting for number of transits required: {total_duration_top20:.2f} hours")

display(df_sorted[["pl_name", "TSM", "N_transits", "total_transit_duration"]].head(20))


Total observing time for top 20 TSM planets accounting for number of transits required: 416.70 hours


Unnamed: 0,pl_name,TSM,N_transits,total_transit_duration
213,GJ 1214 b,380.584732,1,1.73932
4856,L 98-59 d,327.812477,1,1.68
54,AU Mic b,319.920731,1,6.9854
958,HD 219134 b,268.059893,1,1.89
6062,pi Men c,242.520049,2,11.808096
664,HD 136352 c,214.891116,2,13.004
861,HD 191939 d,208.188326,2,21.44
242,GJ 3090 b,201.591013,2,5.124
959,HD 219134 c,188.393035,2,6.64
1020,HD 260655 c,179.835293,2,3.92
