In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

#filename = r"C:\Exoplanet\PS_2026.01.15_09.23.48.csv"
filename = "C:\Exoplanet\composite_exo_data-unfiltered.csv"
df = pd.read_csv(filename, comment='#', low_memory=False) # comment="#" skips NASA metadata lines
# low_memory=False avoids mixed dtype warnings
#print(df.columns)

df_filtered = df[
    (
        # -----------------------------
        # Planet size selection
        # -----------------------------
        df["pl_rade"].between(1.5, 4) &

        # -----------------------------
        # Orbital + mass measurements
        # -----------------------------
        df["pl_bmasse"].notna() &
        df["pl_orbsmax"].notna() &

        # -----------------------------
        # Stellar properties
        # -----------------------------
        df["st_teff"].notna() &
        df["st_rad"].notna() &
        df["st_mass"].notna() &
        df["st_met"].notna() &

        # -----------------------------
        # Observing properties
        # -----------------------------
        df["sy_jmag"].notna() &
        df["pl_trandur"].notna() &

        # -----------------------------
        # Mass uncertainties (true detections)
        # -----------------------------
        df["pl_bmasseerr1"].notna() &
        df["pl_bmasseerr2"].notna() &
        (df["pl_bmasseerr1"] != 0) &
        (df["pl_bmasseerr2"] != 0) &
        (df["pl_bmasselim"] == 0) &

        # -----------------------------
        # Radius uncertainties (true detections)
        # -----------------------------
        df["pl_radeerr1"].notna() &
        df["pl_radeerr2"].notna() &
        (df["pl_radeerr1"] != 0) &
        (df["pl_radeerr2"] != 0) &
        (df["pl_radelim"] == 0) &

        # -----------------------------
        # System architecture
        # -----------------------------
        df["sy_snum"].between(1, 1) &

        # -----------------------------
        # Reliability
        # -----------------------------
        df["pl_controv_flag"].notna()
    )
].copy()


Rp = df_filtered["pl_rade"].values
Mp = df_filtered["pl_bmasse"].values
starsN = df_filtered["sy_snum"]
Tstar = df_filtered["st_teff"].values
Rstar = df_filtered["st_rad"].values
a = df_filtered["pl_orbsmax"].values
Jmag = df_filtered["sy_jmag"].values
transit = df_filtered["pl_trandur"].values
upperunc = df_filtered["pl_bmasseerr1"].values
lowerunc = df_filtered["pl_bmasseerr2"].values

Rstar_AU = Rstar * 0.00465047  # Conversion of radius of star into AU

Teq = Tstar * np.sqrt(Rstar_AU / (2 * a)) * (1-0.3)**(1/4)  #Equilibrium temperature

df_filtered["Teq_calc"] = Teq
Teq = df_filtered["Teq_calc"].values

def scale_factor(Rp): #code which gives scale factor values depending on radius
    if Rp < 1.5:
        return 0.19
    elif Rp < 2.75:
        return 1.26
    elif Rp < 4.0:
        return 1.28
    else:
        return 1.15

S = np.array([scale_factor(r) for r in Rp])  #scale factors for each planet

TSM = S * (Rp**3 * Teq) / (Mp * Rstar**2) * 10**(-Jmag/5) # transmission spectroscopy metric formula

df_filtered["TSM"] = TSM  #stores TSM values in the filtered table

df_sorted = df_filtered.sort_values("TSM", ascending=False)  # ranks planets by TSM values with highest (best) first

ranked = df_sorted[["pl_name", "pl_rade", "pl_bmasse", "Teq_calc", "TSM" , "pl_trandur"]]

#print(ranked.head(20)) #leftmost column shows position in the dataframe for each row

print("Number of filtered planets is",len(df_filtered))

#top20 = df_sorted.head(20)
#total_duration_hours = top20["pl_trandur"].sum()
#print("Total transit duration of top 20 TSM planets:", total_duration_hours, "hours")



#observationtime=(total_duration_hours)*2
#print("Total observing time if each transit is measured once is",observationtime)



Number of filtered planets is 405


In [2]:
max_tsm = df_sorted["TSM"].max()

# Calculate relative number of transits
df_sorted["N_transits"] = max_tsm / df_sorted["TSM"]

# Round to nearest integer and enforce minimum of 1 transit
df_sorted["N_transits"] = df_sorted["N_transits"].round().astype(int)
df_sorted["N_transits"] = df_sorted["N_transits"].clip(lower=1)

# View results
df_sorted[["pl_name", "TSM", "N_transits"]].head(20)

df_sorted["total_transit_duration"] = (
    df_sorted["N_transits"] * 2.0 * df_sorted["pl_trandur"]
)


top20 = df_sorted.head(20)

total_duration_top20 = (top20["total_transit_duration"].sum())*2

print(f"Total observing time for top 20 TSM planets accounting for number of transits required: {total_duration_top20:.2f} hours")



Total observing time for top 20 TSM planets accounting for number of transits required: 427.99 hours


In [3]:
#Ranking by TSM per hour

# Observing efficiency
df_sorted["TSM_per_hour"] = (
    df_sorted["TSM"] / df_sorted["total_transit_duration"]
)

# Rank planets by efficiency
df_eff = df_sorted.sort_values("TSM_per_hour", ascending=False)

TIME_BUDGET = 200.0  # hours

selected = []
time_used = 0.0
tsm_gained = 0.0

for _, row in df_eff.iterrows():
    duration = row["total_transit_duration"]
    
    if time_used + duration <= TIME_BUDGET:
        selected.append(row)
        time_used += duration
        tsm_gained += row["TSM"]
    else:
        break

df_selected = pd.DataFrame(selected)

display(df_selected[[
    "pl_name",
    "TSM",
    "pl_trandur",
    "N_transits",
    "total_transit_duration",
    "TSM_per_hour"
]])


print(f"Number of targets selected: {len(df_selected)}")
print(f"Total observing time used: {time_used:.2f} hours")
print(f"Total TSM gained: {tsm_gained:.2f}")
#display(df_selected)

Unnamed: 0,pl_name,TSM,pl_trandur,N_transits,total_transit_duration,TSM_per_hour
213,GJ 1214 b,380.584732,0.86966,1,1.73932,218.812371
4856,L 98-59 d,327.812477,0.84,1,1.68,195.126475
958,HD 219134 b,268.059893,0.945,1,1.89,141.830631
1020,HD 260655 c,179.835293,0.98,2,3.92,45.87635
54,AU Mic b,319.920731,3.4927,1,6.9854,45.798484
242,GJ 3090 b,201.591013,1.281,2,5.124,39.342508
5644,TOI-544 b,170.825496,1.21,2,4.84,35.294524
959,HD 219134 c,188.393035,1.66,2,6.64,28.372445
5350,TOI-2015 b,136.622908,0.9876,3,5.9256,23.056384
6062,pi Men c,242.520049,2.952024,2,11.808096,20.538455


Number of targets selected: 23
Total observing time used: 194.42 hours
Total TSM gained: 4228.65


In [4]:
# Accounting for temperature bins as well as TSM per hour

bins = [0, 500, 1000, 3000]  # K
labels = ["Cool", "Warm", "Hot"]

df_sorted["Teq_bin"] = pd.cut(df_sorted["Teq_calc"], bins=bins, labels=labels)

df_sorted["Teq_bin"].value_counts()

TIME_BUDGET = 200.0  # total hours
bin_fraction = {"Cool": 0.33, "Warm": 0.33, "Hot": 0.34}  # fraction of total time

selected = []
time_used_total = 0.0
tsm_gained_total = 0.0

for bin_name, fraction in bin_fraction.items():
    # subset
    df_bin = df_sorted[df_sorted["Teq_bin"] == bin_name].sort_values("TSM_per_hour", ascending=False)
    
    # allocate fraction of total time
    budget_bin = TIME_BUDGET * fraction
    
    time_used_bin = 0.0
    
    for _, row in df_bin.iterrows():
        duration = row["total_transit_duration"]
        if time_used_bin + duration <= budget_bin:
            selected.append(row)
            time_used_bin += duration
            time_used_total += duration
            tsm_gained_total += row["TSM"]
        else:
            break

df_selected = pd.DataFrame(selected)

display(df_selected[[
    "pl_name",
    "Teq_calc",
    "Teq_bin",
    "TSM",
    "pl_trandur",
    "N_transits",
    "total_transit_duration",
    "TSM_per_hour"
]])


print(f"Number of targets selected: {len(df_selected)}")
print(f"Total observing time used: {time_used_total:.2f} hours")
print(f"Total TSM gained: {tsm_gained_total:.2f}")

Unnamed: 0,pl_name,Teq_calc,Teq_bin,TSM,pl_trandur,N_transits,total_transit_duration,TSM_per_hour
4856,L 98-59 d,380.657903,Cool,327.812477,0.84,1,1.68,195.126475
5350,TOI-2015 b,485.850989,Cool,136.622908,0.9876,3,5.9256,23.056384
4872,LP 791-18 c,323.680202,Cool,122.570699,1.167,3,7.002,17.505098
5466,TOI-270 c,448.078774,Cool,120.790783,1.682,3,10.092,11.968964
5561,TOI-4438 b,398.371978,Cool,123.703307,2.021,3,12.126,10.201493
861,HD 191939 d,495.30152,Cool,208.188326,5.36,2,21.44,9.710276
213,GJ 1214 b,518.405137,Warm,380.584732,0.86966,1,1.73932,218.812371
958,HD 219134 b,928.56283,Warm,268.059893,0.945,1,1.89,141.830631
1020,HD 260655 c,509.994434,Warm,179.835293,0.98,2,3.92,45.87635
54,AU Mic b,549.267345,Warm,319.920731,3.4927,1,6.9854,45.798484


Number of targets selected: 21
Total observing time used: 180.64 hours
Total TSM gained: 3852.39


In [5]:
# Early list Temperature bin allocation without accounting for TSM per hour or Time budget, instead taking highest TSM for each bin

top_per_bin = []
total_observing_time = 0.0  # hours

bins = ["Cool", "Warm", "Hot"]

for bin_name in bins:
    # subset planets in this temperature bin
    df_bin = df_sorted[df_sorted["Teq_bin"] == bin_name]
    
    # sort by TSM descending
    df_bin_sorted = df_bin.sort_values("TSM", ascending=False)
    
    # take top 5 planets
    top5 = df_bin_sorted.head(5).copy()  # copy to avoid SettingWithCopyWarning
    
    # calculate observing time per planet: 2 * transit duration * number of transits
    top5["observing_time"] = 2.0 * top5["pl_trandur"] * top5["N_transits"]
    
    # sum observing time for this bin
    total_observing_time += top5["observing_time"].sum()
    
    # append to list
    top_per_bin.append(top5)

# combine all bins into a single DataFrame
df_top5 = pd.concat(top_per_bin).reset_index(drop=True)

# display table
display(df_top5[[
    "pl_name", "Teq_calc", "Teq_bin", "TSM", "pl_trandur", "N_transits", "observing_time"
]])

print(f"Total observing time required for top 5 planets per bin: {total_observing_time:.2f} hours")


Unnamed: 0,pl_name,Teq_calc,Teq_bin,TSM,pl_trandur,N_transits,observing_time
0,L 98-59 d,380.657903,Cool,327.812477,0.84,1,1.68
1,HD 191939 d,495.30152,Cool,208.188326,5.36,2,21.44
2,TOI-2443 b,473.048885,Cool,145.334184,4.548,3,27.288
3,TOI-2015 b,485.850989,Cool,136.622908,0.9876,3,5.9256
4,AU Mic c,420.374388,Cool,130.808393,4.236,3,25.416
5,GJ 1214 b,518.405137,Warm,380.584732,0.86966,1,1.73932
6,AU Mic b,549.267345,Warm,319.920731,3.4927,1,6.9854
7,HD 219134 b,928.56283,Warm,268.059893,0.945,1,1.89
8,HD 136352 c,619.417654,Warm,214.891116,3.251,2,13.004
9,GJ 3090 b,633.297366,Warm,201.591013,1.281,2,5.124


Total observing time required for top 5 planets per bin: 221.19 hours
