In [None]:
#WELCOME TO THE EXOPLANET HUNT
#created by 
#Hızır Kaan ERKAN
#Fatma YALÇIN
#Sefa GAKÇI
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime

# helpers 

def get_float(msg):
    while True:
        s = input(msg).strip().replace(",", ".")
        try:
            return float(s)
        except ValueError:
            print("Enter a number.")

def to_float(x):
    try:
        return float(str(x).replace(",", "."))
    except Exception:
        return np.nan

def parse_sample(raw: str):
    if raw is None:
        return None
    s = raw.strip().lower()
    if s in {"", "all", "full", "max", "*"}:
        return None
    try:
        s = s.replace("_", "").replace(",", "")
        return int(s)
    except Exception:
        print("Invalid sample size, using all rows.")
        return None

def pause():
    try:
        input("\nPress Enter to continue...")
    except EOFError:
        pass

def safe_save(df: pd.DataFrame, ref: Path, name: str) -> Path | None:
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    fn = f"{name}_{ts}.csv"
    for p in (ref.parent / fn, Path.home() / "Desktop" / fn, Path.cwd() / fn):
        try:
            df.to_csv(p, index=False, encoding="utf-8")
            return p
        except Exception:
            continue
    return None

def find_path(s: str) -> Path | None:
    s = s.strip().strip('"').strip("'")
    for p in (Path(s), Path.cwd() / s, Path("C:/") / s, Path.home() / "Desktop" / s):
        try:
            if p.exists():
                return p
        except Exception:
            pass
    return None

# score

def f_tmag(x):    return 1.0 if x < 12 else 0.5
def f_depth(x):   return 1.0 if x > 500 else 0.3
def f_period(x):  return 1.0 if x < 30 else 0.4
def f_duration(x):return 1.0 if x < 10 else 0.6

def score(period, duration, depth, tmag):
    w = (0.58, 0.27, 0.08, 0.07)
    s = (w[0]*f_tmag(tmag) +
         w[1]*f_depth(depth) +
         w[2]*f_period(period) +
         w[3]*f_duration(duration))
    return 100.0 * s

def label(s, mid=46.0, high=80.0):
    if s >= high: return "CP"
    if s >= mid:  return "PC"
    return "APC"

CANDIDATE = {
    "id":      ["toi","tic","pl_name","id","object","name","candidate"],
    "orbper":  ["orbper","period","pl_orbper","orbital_period","per"],
    "trandur": ["trandur","pl_trandur","pl_trandurh","duration","transit_duration","t_dur","tdur"],
    "trandept":["trandept","pl_trandep","depth","transit_depth","ppm","deph","tr_depth","depth_ppm"],
    "tmag":    ["tmag","st_tmag","tic_tmag","mag","t_mag","stellar_tmag"],
}

def _pick(df, keys):
    lm = {c.lower(): c for c in df.columns}
    for k in keys:
        if k.lower() in lm:
            return lm[k.lower()]
    for k in keys:
        for lc, orig in lm.items():
            if k.lower() in lc:
                return orig
    return None

def read_table(p: Path) -> pd.DataFrame:
    if p.suffix.lower() == ".xlsx":
        return pd.read_excel(p)
    return pd.read_csv(p, sep=None, engine="python", comment="#", encoding="utf-8")

def find_cols(df, manual=None, interactive=True):
    manual = manual or {}
    m = {}
    for role, keys in CANDIDATE.items():
        if role in manual and manual[role] in df.columns:
            m[role] = manual[role]
        else:
            m[role] = _pick(df, keys)
    needed = ["orbper","trandur","trandept","tmag"]
    missing = [r for r in needed if (m.get(r) is None or m[r] not in df.columns)]
    if missing and interactive:
        print("\nColumns:", ", ".join(df.columns[:80]))
        for r in needed:
            if r not in m or m[r] not in df.columns:
                v = input(f"Column for '{r}' (empty=skip): ").strip()
                if v and v in df.columns:
                    m[r] = v
        missing = [r for r in needed if (m.get(r) is None or m[r] not in df.columns)]
    if missing:
        raise ValueError("Missing columns: " + ", ".join(missing))
    return m

# scoring+summary

def score_df(df, cols, keep_id=True):
    rows = []
    for i, r in df.iterrows():
        s = score(
            to_float(r[cols["orbper"]]),
            to_float(r[cols["trandur"]]),
            to_float(r[cols["trandept"]]),
            to_float(r[cols["tmag"]]),
        )
        rid = r[cols["id"]] if keep_id and cols.get("id") in df.columns else i
        rows.append({"id": rid, "score": round(s, 1), "label": label(s)})
    return pd.DataFrame(rows)

def summary(df_s, thr=80.0):
    mean = float(df_s["score"].mean())
    median = float(df_s["score"].median())
    std = float(df_s["score"].std(ddof=0))
    counts = df_s["label"].value_counts().to_dict()
    total = len(df_s)
    rate = float((df_s["score"] >= thr).mean()) * 100.0
    return {"n": total, "mean": round(mean,2), "median": round(median,2),
            "std": round(std,2), "pass@80": round(rate,2), "counts": counts}

def quick_summary(path_or_name, n=None, seed=42):
    p = find_path(str(path_or_name))
    if not p: raise FileNotFoundError("File not found: " + str(path_or_name))
    df = read_table(p)
    cols = find_cols(df)
    work = df if (n is None or n <= 0 or n >= len(df)) else df.sample(n=n, random_state=seed)
    sc = score_df(work, cols)
    sm = summary(sc)
    print("\nSummary")
    print(f"N={sm['n']} | mean={sm['mean']} | median={sm['median']} | std={sm['std']} | pass@80={sm['pass@80']}%")
    print("Counts:", sm["counts"])
    print("\nTop 10")
    print(sc.sort_values("score", ascending=False).head(10))
    return sc, sm, cols, p

def full_export(path_or_name, n=None, seed=42):
    p = find_path(str(path_or_name))
    if not p: raise FileNotFoundError("File not found: " + str(path_or_name))
    df = read_table(p)
    cols = find_cols(df)
    work = df if (n is None or n <= 0 or n >= len(df)) else df.sample(n=n, random_state=seed)
    sc = score_df(work, cols)
    sm = summary(sc)
    sv = safe_save(sc, p, "results")
    if sv: print(f"Saved: {sv}")
    else: print("Save failed.")
    print("\nSummary")
    print(f"N={sm['n']} | mean={sm['mean']} | median={sm['median']} | std={sm['std']} | pass@80={sm['pass@80']}%")
    print("Counts:", sm["counts"])
    print("\nTop 10")
    print(sc.sort_values("score", ascending=False).head(10))
    return sc, sm, cols, p, sv

#  manual single row

def single_row():
    op = get_float("Orbital period (days): ")
    td = get_float("Transit duration (hours): ")
    dp = get_float("Transit depth (ppm): ")
    tm = get_float("TESS magnitude: ")
    s = score(op, td, dp, tm)
    print(f"\nScore: {s:.1f} | Label: {label(s)}\n")

#  menu

def main():
    while True:
        print("\n1) Single row")
        print("2) CSV summary")
        print("3) CSV full export")
        print("0) Exit")
        ch = input("Choice: ").strip()
        if ch == "1":
            try: single_row()
            except Exception as e: print("Error:", e)
            finally: pause()
        elif ch == "2":
            f = input("CSV path/name: ")
            nraw = input("Sample size (int, empty=all): ")
            n = parse_sample(nraw)
            try: quick_summary(f, n=n)
            except Exception as e: print("Error:", e)
            finally: pause()
        elif ch == "3":
            f = input("CSV path/name: ")
            nraw = input("Sample size (int, empty=all): ")
            n = parse_sample(nraw)
            try: full_export(f, n=n)
            except Exception as e: print("Error:", e)
            finally: pause()
        elif ch == "0":
            print("Done.")
            break
        else:
            print("Invalid.")

if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        print("\nCancelled.")


1) Single row
2) CSV summary
3) CSV full export
0) Exit
