In [None]:
#Filtering the non-GPS constellations
import re
import numpy as np
import pandas as pd
from google.colab import files

# 1) Upload any Excel file
uploaded = files.upload()
infile = list(uploaded.keys())[0]

xls = pd.ExcelFile(infile)
outname = "processed_gps_output.xlsx"

with pd.ExcelWriter(outname, engine="openpyxl") as writer:
    for sheet in xls.sheet_names:
        print(f"Processing: {sheet}")
        df = pd.read_excel(infile, sheet_name=sheet)

        # --- Keep only A-D and rename ---
        df = df.iloc[:, :4].copy()
        df.columns = ["gps_week", "sow", "satellite_id", "vtec"]

        # --- Clean types ---
        # vtec to numeric (NaNs kept -> skipped in mean)
        df["vtec"] = pd.to_numeric(df["vtec"], errors="coerce")

        # satellite_id -> numeric  (handles 1..37 or strings like "G01", "GPS12")
        sid_num = pd.to_numeric(df["satellite_id"], errors="coerce")
        need_parse = sid_num.isna()
        if need_parse.any():
            # extract digits from strings like "G01", "GPS12", etc.
            extracted = df.loc[need_parse, "satellite_id"].astype(str).str.extract(r"(\d+)", expand=False)
            sid_num.loc[need_parse] = pd.to_numeric(extracted, errors="coerce")
        df["satellite_id_num"] = sid_num

        # --- GPS only: IDs 1..37 ---
        df = df[df["satellite_id_num"].between(1, 37)].reset_index(drop=True)

        if df.empty:
            # write headers only if a sheet has no GPS rows
            pd.DataFrame(columns=[
                "A_gps_week","B_sow","C_satellite_id","D_vtec",
                "E_blank","F_minute_index","G_minute_avg_vtec",
                "H_blank","I_hour_index","J_hour_avg_vtec"
            ]).to_excel(writer, sheet_name=sheet, index=False)
            print("  (no GPS rows found on this sheet)")
            continue

        # --- Minute index: every 37 rows = 1 minute (1,2,3,...) ---
        df["minute_index"] = (np.arange(len(df)) // 37) + 1

        # Minute averages (NaNs skipped by default)
        minute_avg = df.groupby("minute_index", as_index=False)["vtec"].mean()
        minute_avg.rename(columns={"vtec": "minute_avg_vtec"}, inplace=True)

        # Attach minute averages to each row
        df = df.merge(minute_avg, on="minute_index", how="left")

        # --- Hour index: 60 minutes = 1 hour (1..24) ---
        df["hour_index"] = ((df["minute_index"] - 1) // 60) + 1

        # Hour averages computed from minute averages
        hour_avg = minute_avg.copy()
        hour_avg["hour_index"] = ((hour_avg["minute_index"] - 1) // 60) + 1
        hour_avg = hour_avg.groupby("hour_index", as_index=False)["minute_avg_vtec"].mean()
        hour_avg.rename(columns={"minute_avg_vtec": "hour_avg_vtec"}, inplace=True)

        # Attach hour averages to each row
        df = df.merge(hour_avg, on="hour_index", how="left")

        # Optional: round for readability
        df["minute_avg_vtec"] = df["minute_avg_vtec"].round(5)
        df["hour_avg_vtec"]   = df["hour_avg_vtec"].round(5)

        # --- Arrange columns to match letters (E & H blank on purpose) ---
        df_out = pd.DataFrame({
            "A_gps_week": df["gps_week"],
            "B_sow": df["sow"],
            "C_satellite_id": df["satellite_id"],
            "D_vtec": df["vtec"],
            "E_blank": "",
            "F_minute_index": df["minute_index"],
            "G_minute_avg_vtec": df["minute_avg_vtec"],
            "H_blank": "",
            "I_hour_index": df["hour_index"],
            "J_hour_avg_vtec": df["hour_avg_vtec"],
        })

        df_out.to_excel(writer, sheet_name=sheet, index=False)

print(f"âœ… Done. Saved as {outname}")
files.download(outname)

