# Find and Download NASA PACE L1 Data Example

## Requirements
Use of this notebook requires that your EarthData credentials be in .netrc file within your user directory.

```
machine urs.earthdata.nasa.gov
login your-login-here
password your-password-here
```

## Import Required Packages

In [1]:
from datetime import datetime
import os
import requests
import shutil
import time

## Helper Functions

In [2]:
def find_cmr_items(sensor: str, begin_datetime: datetime = datetime(2024,4,1), end_datetime: datetime = datetime(2100,12,31,23,59,59), 
               lat_min: float = -90.000, lat_max:float = 90.000, lon_min: float = -180.000, lon_max: float = 180.000, 
               page_size: int = 2000, page_num: int = 1, verbose: bool = True) -> list:
    
    """
    Find relevant datasets/files using the NASA Common Metadata Repository.
    
    :param sensor: A string indicating the sensor of interest. Partial strings are accepted. e.g. HARP instead of PACE_HARP2
    :param begin_datetime: The point in time to start searching for data.
    :param end_datetime: The point in time to stop searching for data.
    :param lat_min: The minimum bounding latitude. Valid between -90 and 90.
    :param lat_max: The maximum bounding latitude. Valid between -90 and 90.
    :param lon_min: The minimum bounding longitude. Valid between -180 and 180.
    :param lon_max: The maximum bounding longitude. Valid between -180 and 180.
    :param page_size: The number of items to return for each request. Maximum is 2000.
    :param page_num: The page to return.
    :param verbose: If True, messages are printed to console. 
    :return: A list of CMR items, which provide the file download location and associated metadata.
    """
    
    bdt = begin_datetime.strftime('%Y-%m-%dT%H:%M:%SZ')
    edt = end_datetime.strftime('%Y-%m-%dT%H:%M:%SZ')
    base = 'https://cmr.earthdata.nasa.gov/search/granules.umm_json'
    params = {'provider': 'OB*',
              'options[provider][pattern]':'true',
              'short_name': f"*{sensor}*",
              'options[short_name][pattern]':'true',
              'page_size': page_size,
              'page_num': page_num,
              'temporal': f'{bdt},{edt}',
              'bounding_box': f"{lon_min},{lat_min},{lon_max},{lat_max}"}
    
    with requests.get(base, params = params) as response:
        if response.status_code != requests.codes.ok:
            raise ConnectionError(response.reason)
        data = response.json()
        hits, took, items = (data['hits'], data['took'], data['items'])
        if verbose is True:
            msg = f"Request took {took} milliseconds to complete."
            print(msg)
    if hits == 0:
        if verbose is True:
            print('No files found.')
        return None
    else:
        if verbose is True:
            print(f"Found {len(items)} files.")
        return items
    
    
def download_cmr_files(items: list, save_directory: os.path = os.getcwd(), overwrite: bool = False, verbose: bool = True) -> None:
    """
    Download NASA files.
    
    :param items: A list of items derived from the find_cmr_items function.
    :param save_directory: The directory you want to save data to.
    :param overwrite: Set to True if you want to overwrite data that already exists.
    :param verbose: Set to True if you want info messages printed to console.
    :return: None    
    """
    
    os.makedirs(save_directory, exist_ok = True)
    for item in items:
        umm = item['umm']
        download_url = umm['RelatedUrls'][0]['URL']
        save_filename = download_url.split('/')[-1]
        save_filepath = os.path.normpath(os.path.join(save_directory, save_filename))
        if os.path.isfile(save_filepath) and overwrite is False:
            continue
        with requests.get(download_url, stream = True) as req:
            with open(save_filepath,'wb') as fileobj:
                shutil.copyfileobj(req.raw, fileobj)
        if not os.path.isfile(save_filepath):
            raise FileNotFoundError(save_filepath)
        else:
            if verbose is True:
                print(f'Downloaded {save_filename}')

### Define Sensor and Spatial/Temporal Bounds

In [3]:
lat_min = 41
lat_max = 47
lon_min = -130
lon_max = -123
begin_datetime = datetime(2024,4,11,0,0,0)
end_datetime = datetime(2024,4,11,23,59,59)
sensor = 'OCI*L1C' #Some Other Options: 'OCI*L1A', 'OCI*L1B', 'OCI*L1C', You could also try 'HARP' and 'SPEX' for the other sensors.

### Find Relevant Datasets

In [4]:
oci_items = find_cmr_items(sensor, begin_datetime = begin_datetime, end_datetime = end_datetime, lat_min = lat_min, lat_max = lat_max, lon_min = lon_min, lon_max = lon_max)

Request took 166 milliseconds to complete.
Found 3 files.


In [5]:
print('Download URLs...')
for item in oci_items:
    print(item['umm']['RelatedUrls'][0]['URL'])

Download URLs...
https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240411T200333.L1C.5km.nc
https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240411T200833.L1C.5km.nc
https://obdaac-tea.earthdatacloud.nasa.gov/ob-cumulus-prod-public/PACE_OCI.20240411T214153.L1C.5km.nc


### Download Files

In [6]:
%%time
overwrite = True
download_cmr_files(oci_items, overwrite = overwrite)

Downloaded PACE_OCI.20240411T200333.L1C.5km.nc
Downloaded PACE_OCI.20240411T200833.L1C.5km.nc
Downloaded PACE_OCI.20240411T214153.L1C.5km.nc
CPU times: user 3.81 s, sys: 3.48 s, total: 7.29 s
Wall time: 58 s


### Open A File

In [7]:
from netCDF4 import Dataset
import xarray as xr

In [8]:
filename = 'PACE_OCI.20240411T200833.L1C.5km.nc'

In [9]:
root = Dataset(filename, 'r')
print(root.groups)

{'sensor_views_bands': <class 'netCDF4._netCDF4.Group'>
group /sensor_views_bands:
    dimensions(sizes): 
    variables(dimensions): float32 sensor_view_angle(number_of_views), float32 intensity_wavelength(number_of_views, intensity_bands_per_view), float32 intensity_bandpass(number_of_views, intensity_bands_per_view), float32 intensity_f0(number_of_views, intensity_bands_per_view)
    groups: , 'bin_attributes': <class 'netCDF4._netCDF4.Group'>
group /bin_attributes:
    dimensions(sizes): 
    variables(dimensions): float64 nadir_view_time(bins_along_track), float64 view_time_offsets(bins_along_track, bins_across_track, number_of_views)
    groups: , 'geolocation_data': <class 'netCDF4._netCDF4.Group'>
group /geolocation_data:
    dimensions(sizes): 
    variables(dimensions): float32 latitude(bins_along_track, bins_across_track), float32 longitude(bins_along_track, bins_across_track), float32 height(bins_along_track, bins_across_track), float32 height_stdev(bins_along_track, bins_a

In [10]:
ds = xr.open_dataset(filename, group = 'geolocation_data')

In [11]:
ds

In [12]:
print(f'Latitude Range: {ds.latitude.min().values}, {ds.latitude.max().values}')
print(f'Longitude Range: {ds.longitude.min().values}, {ds.longitude.max().values}')

Latitude Range: 44.06573486328125, 67.75591278076172
Longitude Range: -149.6260223388672, -96.06509399414062
