In [2]:
import xarray as xr
import pandas as pd

In [3]:
ds=xr.open_dataset("2902202_Sprof.nc")
print(list(ds.variables))

['DATA_TYPE', 'FORMAT_VERSION', 'HANDBOOK_VERSION', 'REFERENCE_DATE_TIME', 'DATE_CREATION', 'DATE_UPDATE', 'PLATFORM_NUMBER', 'PROJECT_NAME', 'PI_NAME', 'STATION_PARAMETERS', 'CYCLE_NUMBER', 'DIRECTION', 'DATA_CENTRE', 'PARAMETER_DATA_MODE', 'PLATFORM_TYPE', 'FLOAT_SERIAL_NO', 'FIRMWARE_VERSION', 'WMO_INST_TYPE', 'JULD', 'JULD_QC', 'JULD_LOCATION', 'LATITUDE', 'LONGITUDE', 'POSITION_QC', 'POSITIONING_SYSTEM', 'CONFIG_MISSION_NUMBER', 'PARAMETER', 'SCIENTIFIC_CALIB_EQUATION', 'SCIENTIFIC_CALIB_COEFFICIENT', 'SCIENTIFIC_CALIB_COMMENT', 'SCIENTIFIC_CALIB_DATE', 'PROFILE_PRES_QC', 'PROFILE_TEMP_QC', 'PROFILE_PSAL_QC', 'PROFILE_DOXY_QC', 'PROFILE_CHLA_QC', 'PROFILE_BBP700_QC', 'PRES', 'PRES_QC', 'PRES_ADJUSTED', 'PRES_ADJUSTED_QC', 'PRES_ADJUSTED_ERROR', 'TEMP', 'TEMP_QC', 'TEMP_dPRES', 'TEMP_ADJUSTED', 'TEMP_ADJUSTED_QC', 'TEMP_ADJUSTED_ERROR', 'PSAL', 'PSAL_QC', 'PSAL_dPRES', 'PSAL_ADJUSTED', 'PSAL_ADJUSTED_QC', 'PSAL_ADJUSTED_ERROR', 'DOXY', 'DOXY_QC', 'DOXY_dPRES', 'DOXY_ADJUSTED', 'DOX

In [4]:
time = ds["JULD"].values       
lat = ds["LATITUDE"].values
lon = ds["LONGITUDE"].values
pres = ds["PRES"].values        # Pressure
temp = ds["TEMP"].values        # Temperature
psal = ds["PSAL"].values        # Salinity

doxy = ds.get("DOXY", None)     # Oxygen
chla = ds.get("CHLA", None)     # Chlorophyll
bbp  = ds.get("BBP700", None)   # Backscatter at 700 nm

In [None]:
profiles = []
for i in range(ds.dims["N_PROF"]):
    profile_data = {
        "depth": ds["PRES"].isel(N_PROF=i).values,
        "temp": ds["TEMP"].isel(N_PROF=i).values,
        "salinity": ds["PSAL"].isel(N_PROF=i).values,
        "oxygen": ds["DOXY"].isel(N_PROF=i).values,
        "chlorophyll": ds["CHLA"].isel(N_PROF=i).values,
    }
    profiles.append(pd.DataFrame(profile_data))

print(profiles[0].head())


  for i in range(ds.dims["N_PROF"]):


   depth       temp   salinity      oxygen  chlorophyll
0   4.30  26.846001  36.587002         NaN          NaN
1   5.23  26.831230  36.588097  198.237595       0.2088
2   6.00  26.819000  36.589001         NaN          NaN
3   8.00  26.799999  36.591000         NaN          NaN
4  10.00  26.788000  36.591999         NaN          NaN


In [7]:
all_profiles = []
for i in range(ds.sizes["N_PROF"]):
    profile_data = {
        "profile": i,
        "depth": ds["PRES"].isel(N_PROF=i).values,
        "temp": ds["TEMP"].isel(N_PROF=i).values,
        "salinity": ds["PSAL"].isel(N_PROF=i).values,
        "oxygen": ds["DOXY"].isel(N_PROF=i).values,
        "chlorophyll": ds["CHLA"].isel(N_PROF=i).values,
    }
    df = pd.DataFrame(profile_data)
    all_profiles.append(df)

merged = pd.concat(all_profiles, ignore_index=True)
merged.to_csv("bgc_sample.csv", index=False)

In [8]:
df = pd.read_csv("bgc_sample.csv")

df = df[df["depth"].notna()]
df = df[df["depth"] >= 0]

df = df.groupby("profile").apply(lambda g: g.interpolate(limit_direction="both")).reset_index(drop=True)

df["temp"] = df["temp"].clip(-2, 40)            # °C
df["salinity"] = df["salinity"].clip(0, 42)     # PSU
df["oxygen"] = df["oxygen"].clip(lower=0)       # µmol/kg, no negatives
df["chlorophyll"] = df["chlorophyll"].clip(lower=0)  # mg/m³, no negatives

df.to_csv("bgc_sample_cleaned.csv", index=False)

  df = df.groupby("profile").apply(lambda g: g.interpolate(limit_direction="both")).reset_index(drop=True)
