In [1]:
import os
import glob

from tqdm import tqdm

import netCDF4 as nc
import numpy as np
from scipy.spatial import cKDTree
import matplotlib.pyplot as plt
import sys
import platform
import importlib

if platform.system() == 'Darwin':  # macOS
    base_FP = '/Users/hyunglokkim/Insync/hkim@geol.sc.edu/Google_Drive'
    cpuserver_data_FP = '/Users/hyunglokkim/cpuserver_data'
else:
    base_FP = '/data'
    cpuserver_data_FP = '/data'
sys.path.append(base_FP + '/python_modules')

import HydroAI.Grid as hGrid
importlib.reload(hGrid);

In [2]:
def list_nc_files(base_dir):
    # This function will return a list of all .nc files in the directory and its subdirectories
    nc_files = []
    # Walk through the directory
    for root, dirs, files in os.walk(base_dir):
        # Filter for .nc files
        for file in files:
            if file.endswith(".nc4"):
                full_path = os.path.join(root, file)
                nc_files.append(full_path)
    
    nc_files.sort() 
    return nc_files

base_dir = "/Users/hyunglokkim/cpuserver_data/CYGNSS/L1_V21"
nc_file_list = list_nc_files(base_dir)

In [5]:
#ref_lon, ref_lat = hSMAP.get_e2grid(cpuserver_data_FP, 'SPL3SMP.006')
ref_lon, ref_lat = hGrid.generate_lat_lon_e2grid('3km')
data_count = np.zeros_like(ref_lat, dtype=int)
# Flatten the reference arrays and stack them as [latitude, longitude]
ref_points = np.column_stack((ref_lat.flatten(), ref_lon.flatten()))
tree = cKDTree(ref_points)

Processing row 0/4872
Processing row 100/4872
Processing row 200/4872
Processing row 300/4872
Processing row 400/4872
Processing row 500/4872
Processing row 600/4872
Processing row 700/4872
Processing row 800/4872
Processing row 900/4872
Processing row 1000/4872
Processing row 1100/4872
Processing row 1200/4872
Processing row 1300/4872
Processing row 1400/4872
Processing row 1500/4872
Processing row 1600/4872
Processing row 1700/4872
Processing row 1800/4872
Processing row 1900/4872
Processing row 2000/4872
Processing row 2100/4872
Processing row 2200/4872
Processing row 2300/4872
Processing row 2400/4872
Processing row 2500/4872
Processing row 2600/4872
Processing row 2700/4872
Processing row 2800/4872
Processing row 2900/4872
Processing row 3000/4872
Processing row 3100/4872
Processing row 3200/4872
Processing row 3300/4872
Processing row 3400/4872
Processing row 3500/4872
Processing row 3600/4872
Processing row 3700/4872
Processing row 3800/4872
Processing row 3900/4872
Processing r

In [None]:
# Load the first NetCDF file
for i, file_name in tqdm(enumerate(nc_file_list[:500]), total=len(nc_file_list[:500]), desc="Processing Files"):
    dataset = nc.Dataset(file_name)
    # Assume the latitude and longitude variable names are known
    sp_lat = dataset.variables['sp_lat'][:].flatten().compressed()
    sp_lon = dataset.variables['sp_lon'][:].flatten().compressed() - 180
    # Stack satellite latitudes and longitudes
    sat_points = np.column_stack((sp_lat, sp_lon))
    _, indices = tree.query(sat_points)
    
    rows, cols = np.unravel_index(indices, ref_lat.shape)

    np.add.at(data_count, (rows, cols), 1)

Processing Files:  95%|████████████████████████████████████▉  | 473/500 [02:47<00:08,  3.36it/s]

In [None]:
# Initialize a dictionary to hold lists of sp_inc_angle values for each grid cell
from collections import defaultdict
angle_data = defaultdict(list)

for i, file_name in tqdm(enumerate(nc_file_list[:1000]), total=len(nc_file_list[:1000]), desc="Processing Files"):
    dataset = nc.Dataset(file_name)
    # Extract latitude, longitude, and incidence angle
    sp_lat = dataset.variables['sp_lat'][:].flatten().compressed()
    sp_lon = dataset.variables['sp_lon'][:].flatten().compressed() - 180
    sp_inc_angle = dataset.variables['sp_inc_angle'][:].flatten().compressed()

    # Stack satellite latitudes and longitudes
    sat_points = np.column_stack((sp_lat, sp_lon))
    _, indices = tree.query(sat_points)

    # Get the row and column indices in the reference grid
    rows, cols = np.unravel_index(indices, ref_lat.shape)

    # Store sp_inc_angle values in the dictionary
    for idx, angle in zip(indices, sp_inc_angle):
        angle_data[idx].append(angle)

In [None]:
mean_angles = {idx: np.mean(angles) for idx, angles in angle_data.items()}

In [None]:
plt.hist(angle_data[97857])

In [None]:
plt.figure(figsize=(10, 6))
#im = plt.imshow(data_count, cmap='viridis')
im = plt.imshow(mean_angles, cmap='viridis')
plt.colorbar(im)
plt.title("Visualization of Data Count")
plt.xlabel("Longitude Index")
plt.ylabel("Latitude Index")
plt.show()

In [None]:
import numpy as np
import netCDF4 as nc
import os
from scipy.spatial import cKDTree
from multiprocessing import Pool

# Make sure you have imported or defined hSMAP.get_e2grid
import sys
import platform

if platform.system() == 'Darwin':  # macOS
    base_FP = '/Users/hyunglokkim/Insync/hkim@geol.sc.edu/Google_Drive'
    cpuserver_data_FP = '/Users/hyunglokkim/cpuserver_data'
else:
    base_FP = '/data'
    cpuserver_data_FP = '/data'
sys.path.append(base_FP + '/python_modules')
import HydroAI.SMAP as hSMAP

def list_nc_files(base_dir):
    nc_files = []
    for root, dirs, files in os.walk(base_dir):
        for file in files:
            if file.endswith(".nc4"):
                full_path = os.path.join(root, file)
                nc_files.append(full_path)
    nc_files.sort()
    return nc_files

def process_file(args):
    file_name, ref_points = args
    dataset = nc.Dataset(file_name)
    sp_lat = dataset.variables['sp_lat'][:].flatten().compressed()
    sp_lon = dataset.variables['sp_lon'][:].flatten().compressed() - 180
    sat_points = np.column_stack((sp_lat, sp_lon))
    
    tree = cKDTree(ref_points)
    _, indices = tree.query(sat_points)
    
    # Assuming ref_lat and ref_lon are arranged in a two-dimensional grid
    rows, cols = np.unravel_index(indices, (len(ref_lat), len(ref_lon)))

    local_data_count = np.zeros((len(ref_lat), len(ref_lon)), dtype=int)
    np.add.at(local_data_count, (rows, cols), 1)
    return local_data_count

def reduce_data_counts(total, addition):
    total += addition
    return total

base_dir = "/Users/hyunglokkim/cpuserver_data/CYGNSS/L1_V21"
nc_file_list = list_nc_files(base_dir)
ref_lon, ref_lat = hSMAP.get_e2grid(cpuserver_data_FP, 'SPL3SMP.006')
ref_points = np.column_stack((ref_lat.flatten(), ref_lon.flatten()))

if __name__ == '__main__':
    
    with Pool() as pool:
        results = pool.map(process_file, [(file_name, ref_points) for file_name in nc_file_list])
        data_count = np.zeros_like(ref_lat, dtype=int)
        for result in results:
            data_count = reduce_data_counts(data_count, result)

    print("Data count grid:", data_count)

In [None]:
plt.imshow(data_count)

In [None]:
longitudes, latitudes = hGrid.generate_lat_lon_e2grid('3km')

In [None]:
longitudes.shape

In [None]:
ref_lon

In [None]:
class EASE2GRID:
    grid_params = {
        '1km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7314540.83, 'res': 1000.9, 'n_cols': 34704, 'n_rows': 14616},
        '3km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7314540.83, 'res': 3002.69, 'n_cols': 11568, 'n_rows': 4872},
        '3.125km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7307375.92, 'res': 3128.16, 'n_cols': 11104, 'n_rows': 4672},
        '6.25km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7307375.92, 'res': 6256.32, 'n_cols': 5552, 'n_rows': 2336},
        '9km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7314540.83, 'res': 9008.05, 'n_cols': 3856, 'n_rows': 1624},
        '12.5km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7307375.92, 'res': 12512.63, 'n_cols': 2776, 'n_rows': 1168},
        '25km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7307375.92, 'res': 25025.26, 'n_cols': 1388, 'n_rows': 584},
        '36km': {'epsg': 6933, 'x_min': -17367530.44, 'y_max': 7314540.83, 'res': 36032.22, 'n_cols': 964, 'n_rows': 406}
    }

    def __init__(self, resolution):
        if resolution not in self.grid_params:
            raise ValueError(f"Unsupported resolution: {resolution}")

        params = self.grid_params[resolution]
        self.name = f'EASE2_G{resolution}'
        self.epsg = params['epsg']
        self.x_min = params['x_min']
        self.y_max = params['y_max']
        self.res = params['res']
        self.n_cols = params['n_cols']
        self.n_rows = params['n_rows']

    # Assume rc2lonlat is already defined within this class and uses the attributes set above

# Example usage:
grid = EASE2GRID('3km')
lon, lat = grid.rc2lonlat(col=1, row=0)
print(f"Longitude: {lon}, Latitude: {lat}")