# Prepare dataset for submission

In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import xarray as xr
import rioxarray as rxr
import os
import glob
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import sys

In [None]:
# Define path to "glacier-snow-cover-mapping" and path to data
base_path = '/Users/raineyaberle/Research/PhD/snow_cover_mapping/snow-cover-mapping/'
data_path = '/Volumes/LaCie/raineyaberle/Research/PhD/write-ups/CH1_snow_cover_mapping_methods_manuscript/Aberle_et_al_dataset_submission/'

# Load functions
sys.path.insert(1, os.path.join(base_path, 'functions'))
import pipeline_utils as f

# Grab site names from data_path
site_names = [site_name for site_name in sorted(os.listdir(data_path)) 
              if os.path.isdir(os.path.join(data_path, site_name))]
site_names

## Snow cover statistics

In [None]:
# Define snow cover stats columns
scs_cols = ['site_name', 'datetime', 'source', 'HorizontalCRS', 'VerticalCRS', 
            'SCA_m2', 'AAR', 'ELA_from_AAR_m', 'snowline_elevs_m', 'snowline_elevs_median_m',  
            'snowlines_coords_X', 'snowlines_coords_Y', 'snowline_geometry']

In [None]:
# Iterate over site names
for site_name in site_names:
    print(f'\n{site_name}')
    
    # Load snow cover stats CSV
    scs_fn = glob.glob(os.path.join(data_path, site_name, f'{site_name}_snow_cover_stats.csv'))[0]
    scs_df = pd.read_csv(scs_fn)
    scs_df['datetime'] = pd.to_datetime(scs_df['datetime'])
    
    # Rename "geometry" column
    if 'geometry' in list(scs_df.columns):
        scs_df.rename(columns={'geometry': 'snowline_geometry'}, inplace=True)
    # Rename "dataset" column
    if 'dataset' in list(scs_df.columns):
        scs_df.rename(columns={'dataset': 'source'}, inplace=True)
        
    # Re-assign Vertical CRS column from "EGM95 geoid (EPSG:5773)" to "EPSG:5773"
    scs_df['VerticalCRS'] = 'EPSG:5773'
    
    # Make sure dataframe is ordered by datetime
    scs_df.sort_values(by='datetime', inplace=True)
    
    # Re-order columns
    scs_df = scs_df[scs_cols]

    # Re-save to file
    scs_df.to_csv(scs_fn, index=False)
    print('Snow cover stats re-saved to file:', scs_fn)
    
scs_df

## Classified images

In [None]:
# Define attributes for image files
attrs = {'Description': 'Classified image',
         'Classes' : '1 = Snow, 2 = Shadowed snow, 4 = Ice, 5 = Rock, 6 = Water',
         '_FillValue' : -9999}
attrs_order = ['Description', 'Classes', 'datetime', 'source', '_FillValue']

In [None]:
for site_name in site_names:
    print(f'\n{site_name}')
    
    # Grab all classified image names
    im_classified_fns = sorted(glob.glob(os.path.join(data_path, site_name, 'classified-images', '*.nc')))

    # Iterate over image file names
    for im_classified_fn in tqdm(im_classified_fns):
        # Open image file
        im_classified = xr.open_dataset(im_classified_fn)
        
        # Set "0" values to -9999
        im_classified = xr.where(im_classified.classified==0, -9999, im_classified)
        
        # Grab datetime and dataset from file name
        datetime = os.path.basename(im_classified_fn).split('_')[0]
        dataset = os.path.basename(im_classified_fn).split('_')[2]
        
        # Set attributes
        attrs_image = attrs
        attrs_image['datetime'] = datetime
        attrs_image['source'] = dataset
        attrs_image = {k: attrs_image[k] for k in attrs_order}
        im_classified = im_classified.assign_attrs(attrs_image)
        
        # Reproject image to UTM if necessary
        if im_classified.x.data[0] < 0:
            im_classified = im_classified.rio.write_crs('EPSG:4326')
            # Reproject to UTM
            epsg_utm = f.convert_wgs_to_utm(im_classified.x.data[0], im_classified.y.data[0])
            im_classified = im_classified.rio.reproject(f'EPSG:{epsg_utm}', nodata=-9999)
            
        # Plot
        # plt.figure()
        # plt.imshow(im_classified.classified.data[0], 
        #   extent=(np.min(im_classified.x.data), np.max(im_classified.x.data),
        #          np.min(im_classified.y.data), np.max(im_classified.y.data)))
        # plt.colorbar(shrink=0.5)
        # plt.show()

        # Re-save to file
        im_classified.to_netcdf(im_classified_fn)
