# ${\textbf{G0: From Antoine GROUAZEL's meta SWOT-S1 coloc files to co-located files available in DATARMOR:}}$
# ${\textbf{Generation of df\_coloc dataframe}}$  

In this notebook, we go over the SWOT-Sentinel1 coloc meta files developed by Antoine GROUAZEL, which can be found here:  
coloc_nc_files_doy_i = glob.glob(f"/home/datawork-cersat-public/project/mpc-sentinel1/analysis/s1_data_analysis/coloc/swot_s1/meta_coloc/v3/IW/{year}/{month}/{day}/*")
We then check if the IW file labelled as co-located with a SWOT track has already been processed in L2 level, so one can extract its $H_s$ and other wave parameters.  
This information is summarised in a pandas dataframe, entitled df_coloc, and saved as "df_colocs_swot_S1.csv" for further use. 

In [1]:
import xarray as xr
import shapefile as shp 
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from shapely.geometry import box
from shapely.wkt import loads
import scipy 

import glob
from tqdm import tqdm
import pandas as pd

In [2]:
sar_file_root = r"/home/datawork-cersat-public/cache/project/sarwave/data/products/experiments/slc/iw/l2"
swot_root = r"/home/datawork-WW3/PROJECT/SWOT/WindWave"
coloc_root = r"/home/datawork-cersat-public/project/mpc-sentinel1/analysis/s1_data_analysis/coloc/swot_s1/meta_coloc/v3/IW"

In [3]:
from datetime import datetime, timedelta

def day_of_year_to_month_day(day_of_year, year):
    date = datetime(year, 1, 1) + timedelta(days=day_of_year - 1)
    month = date.strftime("%m")
    day = date.strftime("%d")
    return month, day

# Example usage
month, day = day_of_year_to_month_day(129, 2025)
print(f"Month: {month}, Day: {day}")

Month: 05, Day: 09


# Joining colocation with already processed IW files (outputs csv file with everything)

In [4]:
df_coloc = pd.DataFrame({"year": [],
                         "day_of_year": [],
                         "swot_file": [],
                         "iw_file": []})

df_swot_file = pd.DataFrame({"swot_file":[]})

years = [2023,2024,2025]
for year in years:
    year = int(year)
    
    SAR_day_folders = glob.glob(f"/home/datawork-cersat-public/cache/project/sarwave/data/products/experiments/slc/iw/l2/{year}/*") #  contient: [/home/.../iw/l2/year/DOY for les DOY qui sont processed] - e.g. 001? 129 pour 2025
    for day_folder in tqdm(SAR_day_folders): # /home/datawork-cersat-public/cache/project/sarwave/data/products/experiments/slc/iw/l2/2025/129'  (*/001, */129 en 2025 )
        day_of_year = int(day_folder[-3:])

        #print("\n Year:", year)
        #print("Day of year:", day_of_year)
        #print("\n")
        
        #SAFE_processed_folders_doy_i = glob.glob(f"{day_folder}/*") # y a tous les SAFE du day_of_year i PROCESSÉS 
        # WAV__2SDV signigie que ce sont des produits de niveau 2; E12 = Un 3-digit pour spécifier quel modèle a été utilisé pour générer ce produit (ça pourrait être autre chose, genre D49, et correspondre à une autre configuration)

        month, day = day_of_year_to_month_day(day_of_year, year)
        # Anciennes colocs d'Antoine
        coloc_nc_files_doy_i = glob.glob(f"/home/datawork-cersat-public/project/mpc-sentinel1/analysis/s1_data_analysis/coloc/swot_s1/meta_coloc/v3/IW/{year}/{month}/{day}/*")
        # Nouvelles colocs d'Antoine
        #coloc_nc_files_doy_i = glob.glob(f"/home/datawork-cersat-public/project/mpc-sentinel1/analysis/s1_data_analysis/coloc/swot_s1/seastate_coloc/v1/IW/{year}/{month}/{day}/*")
        

        for coloc_nc_file in coloc_nc_files_doy_i:
            ds_coloc = xr.open_dataset(coloc_nc_file)
            swot_l3_snippet = ds_coloc.filepath_swot.values # e.g. 'SWOT_L3_LR_SSH_Expert_026_249_20250101T165939_20250101T175106_v2.0.1.nc'
            #print(swot_l3_snippet)
            try:
                cycle = int(swot_l3_snippet[0][22:25]) # e.g. 026
                track = int(swot_l3_snippet[0][26:29]) # e.g. 249
                #print("cycle", cycle)
                #print("track", track)
                
                SWOT_l2_filepath = glob.glob(f'/home/datawork-WW3/PROJECT/SWOT/WindWave/*{cycle:03d}_{track:03d}_*.nc')[0] # e.g. '/home/datawork-WW3/PROJECT/SWOT/WindWave/SWOT_L2_LR_SSH_WindWave_026_249_20250101T165939_20250101T175024_PIC2_01.nc'
                df_swot_file = pd.concat((df_swot_file, pd.DataFrame({"swot_file":[SWOT_l2_filepath]})))
                #print(SWOT_l2_filepath)
            except IndexError:
                continue
            
            SAFE_coloc_folders =  ds_coloc.sar_safe_name.values

            for SAFE_coloc_folder in SAFE_coloc_folders:
                #print("SAFE_coloc_folder:", SAFE_coloc_folder)
                
                snippet = SAFE_coloc_folder[13:67] # e.g. 'SDV_20250101T172524_20250101T172554_057253_070B02_AAF7'
                #print("snippet:", snippet)
                #print(glob.glob(f"{day_folder}/*"))
                matched_IW_paths = glob.glob(f"{day_folder}/*{snippet}*.SAFE/*")
                #print("matched_IW_paths:", matched_IW_paths)
                if len(matched_IW_paths) == 3: # ça signifie que les 3 IW ont déjà été processées
                    
                    df_coloc_row_i = pd.DataFrame({"year": [year, year, year],
                                                   "day_of_year": [day_of_year, day_of_year, day_of_year],
                                                   "swot_file": [SWOT_l2_filepath, SWOT_l2_filepath, SWOT_l2_filepath],
                                                   "iw_file": matched_IW_paths})
        
                    df_coloc = pd.concat((df_coloc, df_coloc_row_i))           
                    
                else: 
                    df_coloc_row_i = pd.DataFrame({"year": [year, year, year],
                                                   "day_of_year": [day_of_year, day_of_year, day_of_year],
                                                   "swot_file": [SWOT_l2_filepath, SWOT_l2_filepath, SWOT_l2_filepath],
                                                   "iw_file": [np.nan, np.nan, np.nan]})
        
                    df_coloc = pd.concat((df_coloc, df_coloc_row_i))  

100%|██████████| 309/309 [01:12<00:00,  4.25it/s]
100%|██████████| 112/112 [01:03<00:00,  1.77it/s]
100%|██████████| 2/2 [00:01<00:00,  1.31it/s]


In [5]:
df_coloc.to_csv("G_df/df_colocs_swot_S1.csv")

In [13]:
np.unique(df_swot_file["swot_file"]).shape

(1058,)

In [28]:
swot_track = ds_coloc.filepath_swot.values[0]
swot_track[:21]
cycle = int(swot_track[22:25])
track = int(swot_track[26:29])
print(cycle)
print(track)
print(swot_track)

26
249
SWOT_L3_LR_SSH_Expert_026_249_20250101T165939_20250101T175106_v2.0.1.nc


In [29]:
import glob as glob
root_windwave = "/home/datawork-WW3/PROJECT/SWOT/WindWave"
#cycle=026; track=488
glob.glob(f'{root_windwave}/*{cycle:03d}_{track:03d}_*.nc')[0]

'/home/datawork-WW3/PROJECT/SWOT/WindWave/SWOT_L2_LR_SSH_WindWave_026_249_20250101T165939_20250101T175024_PIC2_01.nc'