## Desription
______
This script processes Snotel data by reprojecting, trimming and adding a calendar type.

### Import Libraries

In [None]:
import os
import re
import xarray as xr
from rasterio.warp import reproject, Resampling

### Inputs

In [None]:
# Define the latitude and longitude bounds
lat_min, lat_max = 47, 51
lon_min, lon_max = -115, -108

# Define input and output directories
input_dir = './snotel/'
output_dir = './subset_snotel/'

In [None]:
# Define the source and target CRS 
src_crs = {
    'proj': 'latlong',
    'ellps': 'GRS80',
    'datum': 'NAD83',
    'no_defs': True
}
tgt_crs = 'EPSG:4326'

# Define the GeoTransform
geo_transform = (-125.0208, 0.04166662697178698, 0, 49.9375, 0, -0.04166662697178698)

### Process Snotel

In [None]:
# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

In [None]:
# Loop through each snotel .nc file in the input directory
for filename in os.listdir(input_dir):
    if filename.endswith('.nc'):
        # Open the dataset
        filepath = os.path.join(input_dir, filename)
        dataset = xr.open_dataset(filepath)
        
        # Access the units attribute from attrs
        units = dataset['time'].attrs.get('units', 'Attribute not found')
        
        # Try to extract the calendar type from the units string
        calendar_match = re.search(r'calendar=(\w+)', units)
        calendar = calendar_match.group(1) if calendar_match else 'Not specified'
        
        print(f"Processing file: {filename}")
        print(f"The calendar type inferred from units is: {calendar}")
        
        # Set the calendar attribute to "standard" for the time coordinate
        dataset['time'].encoding['calendar'] = 'standard'
        
        # Subset the dataset based on the bounds
        subset_dataset = dataset.sel(lat=slice(lat_min, lat_max), lon=slice(lon_min, lon_max))
        
        # Remove the 'time_str' variable before reprojecting
        spatial_vars = [var for var in subset_dataset.data_vars if var != 'time_str']
        subset_dataset = subset_dataset[spatial_vars]
        
        # Reproject each variable in the dataset to updated CRS
        for var in subset_dataset.variables:
            if 'grid_mapping' in subset_dataset[var].attrs:
                grid_mapping = subset_dataset[var].attrs['grid_mapping']
                if grid_mapping == 'latitude_longitude':
                    data = subset_dataset[var].values
                    lon = subset_dataset['longitude'].values
                    lat = subset_dataset['latitude'].values

                    # Create the source and target transform
                    transform = rasterio.Affine.from_gdal(*geo_transform)
                    tgt_transform = rasterio.crs.CRS.from_string(tgt_crs).to_wkt()

                    # Reproject
                    reprojected_data = np.empty_like(data)
                    reproject(src=data, src_crs=src_crs, src_transform=transform,
                              dst=reprojected_data, dst_crs=tgt_crs,
                              dst_transform=transform, resampling=Resampling.nearest)

                    # Update the variable with reprojected data
                    subset_dataset[var].values = reprojected_data
        
        # Define the output filepath
        output_filepath = os.path.join(output_dir, filename)
        
        # Save the subsetted and reprojected dataset
        subset_dataset.to_netcdf(output_filepath)
        
        print(f"Subsetted and reprojected file saved: {output_filepath}\n")
        
        # Close the dataset
        dataset.close()