In [None]:
#Filtered Non-GPS Constellation and Minutes to Hours Timestamp

!pip install xlsxwriter

import re
import numpy as np
import pandas as pd
from google.colab import files

# ===== 1. Upload Excel File =====
uploaded = files.upload()
if not uploaded:
    raise SystemExit("No file uploaded.")
infile = list(uploaded.keys())[0]

# ===== 2. Read Input Workbook =====
xls = pd.ExcelFile(infile)
outname = infile.rsplit(".", 1)[0] + "_processed.xlsx"
writer = pd.ExcelWriter(outname, engine="xlsxwriter")

for sheet in xls.sheet_names:
    print(f"Processing: {sheet}")
    df = pd.read_excel(xls, sheet_name=sheet)

    # --- Ensure min 4 columns ---
    if df.shape[1] < 4:
        raise ValueError(f"Sheet '{sheet}' must have at least 4 columns: gps_week/SOW/satellite_id/vtec")

    # --- Keep first 4 columns ---
    df = df.iloc[:, :4].copy()
    df.columns = ["gps_week", "sow", "satellite_id", "vtec"]

    # --- Clean types ---
    # vtec to numeric
    df["vtec"] = pd.to_numeric(df["vtec"], errors="coerce")
    # satellite_id: numeric, handles strings (e.g., "G01", "GPS12")
    sid_num = pd.to_numeric(df["satellite_id"], errors="coerce")
    need_parse = sid_num.isna()
    if need_parse.any():
        # extract digits from strings
        extracted = df.loc[need_parse, "satellite_id"].astype(str).str.extract(r"(\d+)", expand=False)
        sid_num.loc[need_parse] = pd.to_numeric(extracted, errors="coerce")
    df["satellite_id_num"] = sid_num

    # --- GPS only: IDs 1..37 ---
    df = df[df["satellite_id_num"].between(1, 37)].copy()
    df["SOW_num"] = pd.to_numeric(df["sow"], errors="coerce")

    if df.empty:
        # Output only headers for empty sheet
        blank = pd.DataFrame(columns=[
            "gps_week", "sow", "satellite_id", "vtec",
            "", "minute_index", "minute_avg_vtec",
            "", "hour_index", "hour_avg_vtec"
        ])
        blank.to_excel(writer, sheet_name=sheet, index=False)
        print("  (no GPS rows found on this sheet)")
        continue

    # --- Minute Index: unique SOW values sorted ---
    sows_sorted = np.sort(df["SOW_num"].dropna().unique())
    minute_map = {val: idx + 1 for idx, val in enumerate(sows_sorted)}
    df["minute_index"] = df["SOW_num"].map(minute_map)

    # --- Minute summary ---
    minute_summary = (
        df.dropna(subset=["minute_index"])
          .groupby("minute_index", as_index=False)["vtec"].mean()
          .rename(columns={"vtec": "minute_avg_vtec"})
    )

    # --- Attach to data ---
    df = df.merge(minute_summary, on="minute_index", how="left")

    # --- Hour indexing and summary ---
    df["hour_index"] = ((df["minute_index"] - 1) // 60) + 1
    minute_summary["hour_index"] = ((minute_summary["minute_index"] - 1) // 60) + 1
    hour_summary = (
        minute_summary.groupby("hour_index")["minute_avg_vtec"]
                      .mean()
                      .reindex(range(1, 25))
                      .reset_index()
                      .rename(columns={"minute_avg_vtec": "hour_avg_vtec"})
    )

    # --- Write blocks to output sheet ---
    # Columns: A-D original, F-G minute summary, I-J hour summary
    df_out = df[["gps_week", "sow", "satellite_id", "vtec"]].copy()
    df_out.to_excel(writer, sheet_name=sheet, startrow=0, startcol=0, index=False)
    # Minute summary
    minute_summary[["minute_index", "minute_avg_vtec"]].to_excel(writer, sheet_name=sheet, startrow=0, startcol=5, index=False)
    # Hour summary
    hour_summary[["hour_index", "hour_avg_vtec"]].to_excel(writer, sheet_name=sheet, startrow=0, startcol=8, index=False)

    # --- Optional formatting for column widths ---
    ws = writer.sheets[sheet]
    ws.set_column(0, 3, 12)  # A-D
    ws.set_column(5, 6, 16)  # F-G
    ws.set_column(8, 9, 14)  # I-J

writer.close()
files.download(outname)
print(f"âœ… Done. Saved as {outname}")


