In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

filename = "C:\Exoplanet\composite_exo_data-unfiltered.csv"
df = pd.read_csv(filename, comment='#', low_memory=False) # comment="#" skips NASA metadata lines
# low_memory=False avoids mixed dtype warnings
#print(df.columns)

df_filtered = df[
    (
        df["pl_rade"].between(1.5, 4) &
        df["pl_bmasse"].notna() &
        df["st_teff"].notna() &
        df["st_rad"].notna() &
        df["pl_orbsmax"].notna() &
        df["sy_jmag"].notna() &
        df["pl_trandur"].notna() &
        df["pl_bmasseerr1"].notna() &
        df["pl_bmasseerr2"].notna() &
        df["sy_snum"].between(1,1)                         
         
    )
].copy()  # Filter the table to select sub-Neptunes and removes rows with missing values
# .copy() avoids pandas "SettingWithCopyWarning"

Rp = df_filtered["pl_rade"].values  # planet radius
Mp = df_filtered["pl_bmasse"].values # planet mass
starsN= df_filtered["sy_snum"]  #number of stars
Tstar = df_filtered["st_teff"].values       # Eq temp of star in K
Rstar = df_filtered["st_rad"].values        # temp of star in R_sun
a = df_filtered["pl_orbsmax"].values        # Orbital semimajor axis in AU
Jmag = df_filtered["sy_jmag"].values
transit = df_filtered["pl_trandur"].values
upperunc=df_filtered["pl_bmasseerr1"].values
lowerunc=df_filtered["pl_bmasseerr2"].values 
Rstar_AU = Rstar * 0.00465047  # Conversion of radius of star into AU

Teq = Tstar * np.sqrt(Rstar_AU / (2 * a)) * (1-0.3)**(0.25)  #Equilibrium temperature

df_filtered["Teq_calc"] = Teq
Teq = df_filtered["Teq_calc"].values

def scale_factor(Rp): #code which gives scale factor values depending on radius
    if Rp < 1.5:
        return 0.19
    elif Rp < 2.75:
        return 1.26
    elif Rp < 4.0:
        return 1.28
    else:
        return 1.15

S = np.array([scale_factor(r) for r in Rp])  #scale factors for each planet

TSM = S * (Rp**3 * Teq) / (Mp * Rstar**2) * 10**(-Jmag/5) # transmission spectroscopy metric formula


df_filtered["TSM"] = TSM  #stores TSM values in the filtered table

max_tsm = df_filtered["TSM"].max()

# Calculate relative number of transits
df_filtered["N_transits"] = max_tsm / df_filtered["TSM"]

# Round to nearest integer and enforce minimum of 1 transit
df_filtered["N_transits"] = df_filtered["N_transits"].round().astype(int)
df_filtered["N_transits"] = df_filtered["N_transits"].clip(lower=1)

# View results
df_filtered[["pl_name", "TSM", "N_transits"]].head(20)

df_filtered["total_transit_duration"] = (
    df_filtered["N_transits"] * 2.0 * df_filtered["pl_trandur"]
)



In [4]:
#general code for finding observing time from a given target list

planet_list = [
    "GJ 1214 b",
    "HD 136352 c",
    "TOI-561 c"
]

# Use the filtered dataframe 
df_targets = df_filtered[df_filtered["pl_name"].isin(planet_list)]

# Check for missing planets
found = set(df_targets["pl_name"])
missing = set(planet_list) - found

if missing:
    print("These planets were not found in the filtered sample:")
    for m in missing:
        print(m)

print("\nObserving time per planet:")
print(df_targets[[
    "pl_name",
    "pl_trandur",
    "N_transits",
    "total_transit_duration"
]])

total_time = df_targets["total_transit_duration"].sum()

print(f"\nTotal observing time required: {total_time:.2f} hours")




Observing time per planet:
          pl_name  pl_trandur  N_transits  total_transit_duration
213     GJ 1214 b     0.86966           1                 1.73932
664   HD 136352 c     3.25100           2                13.00400
5654    TOI-561 c     3.73900           4                29.91200

Total observing time required: 44.66 hours
