In [3]:
from pydap.client import open_url
from datetime import datetime
import numpy as np
import pandas as pd
from datetime import timedelta
from datetime import date
from tqdm import tqdm

In [4]:
def hours_since_ref(year, month, day):
    d0 = date(1992, 10, 5)
    d1 = date(year, month, day)
    delta = d1 - d0
    hours = delta.days*24
    return hours

def get_cygnss_data(year, month, day):
    cygnss_df = pd.DataFrame()
    hours = hours_since_ref(year, month, day)
    test_data_url, test_clickable_url = generate_url(year, month, day)
    
    not_got_data = True
    tries = 0
    while not_got_data:
        try: 
            dataset = open_url(test_data_url, output_grid=False)
            not_got_data = False
        except:
            if tries < 50:
                tries += 1
            else:
                return pd.DataFrame()

    
    df = pd.DataFrame()
    
    mss = np.array(dataset.mean_square_slope[:])
    lat = np.array(dataset.lat[:])
    lon = np.array(dataset.lon[:])
    sample_time = np.array(dataset.sample_time[:])
    sample_time = np.array(sample_time / 3600) + hours

    df['mss'] = mss.tolist()
    df['lat'] = lat.tolist()
    df['lon'] = lon.tolist()
    df['sample_time'] = sample_time.tolist()
    return df

def prep_cygnss(cygnss_df):
    np.warnings.filterwarnings('ignore')
    for key in cygnss_df:
        cygnss_df = cygnss_df[cygnss_df[key] != -9999.0]
    cygnss_df = cygnss_df[cygnss_df["lat"] <= 38]
    cygnss_df = cygnss_df[cygnss_df["lat"] >= -38]
    cygnss_df.dropna(inplace = True)
    return cygnss_df

def generate_url(year, month, day):

    day_of_year = datetime(year, month, day).timetuple().tm_yday
    date_string = str(year) + str(month).zfill(2) + str(day).zfill(2)

    base_url = 'https://podaac-opendap.jpl.nasa.gov/opendap/hyrax/allData/cygnss/L2/v2.1/'
    specific_url = str(year) + '/' + str(day_of_year).zfill(3) + '/cyg.ddmi.s' + \
                   date_string + '-000000-e' + date_string + '-235959.l2.wind-mss.a21.d21.nc'
    data_url = base_url + specific_url
    clickable_url = base_url + specific_url + '.html'

    return data_url + '?lat,lon,mean_square_slope,sample_time', clickable_url

def fetch_cygnss(y1, m1, d1, y2, m2, d2):
    sdate = date(y1, m1, d1)   # start date
    edate = date(y2, m2, d2)   # end date
    delta = edate - sdate       # as timedelta
    df_list = []
    for i in tqdm(range(delta.days + 1)):
        day = sdate + timedelta(days=i)
        df = get_cygnss_data(day.year, day.month, day.day)
        if not df.empty:
            df = prep_cygnss(df)
            df_list.append(df)
            df.to_csv("level_2_mss/" + str(day.year) + "_" +  str(day.month).zfill(2) + "_" +  str(day.day).zfill(2) + ".csv" ,index=False)

In [5]:
fetch_cygnss(2021, 10, 2, 2021, 10, 5)

100%|██████████| 4/4 [11:45<00:00, 176.40s/it]
