# Snow Geospatial Calculations

In [1]:
import glob
import os
from osgeo import gdal
import numpy as np
import pandas as pd
import rioxarray as rio 
# import leafmap.leafmap as leafmap

## Table of Contents

 ####  [Process Snow Data](#bullet1)
 * [Calculate Monthly Snow Cover Averages](#bullet2)
 * [Calculate Snow Cover Anomolies Per Month Per Year](#bullet5)
 * [Select Indivdual Snow Cover Years and Months](#bullet6)
 #### [Process Albedo Data](#bullet7)
 * [Calculate Monthly Albedo Averages](#bullet8)
 * [Calculate Albedo Anomalies Per Month Per Year](#bullet10)
 * [Select Indivdual Years and Months for Albedo](#bullet11)
 #### [Convert Datasets to GeoTIFF](#bullet12)
 * [Define Geospatial Inputs](#bullet3)
 * [Convert Array to Geotiff](#bullet13)
 * [Convert Stacked Array into Multiple Geotiffs](#bullet14)
 * [Transform Geotiff to WGS84 Coordinate System](#bullet15)
 * [Transform Geotiff to NetCDF File](#bullet16)
 #### [Plot Geotiffs Using Leafmap](#bullet17)


# Processing Snow Cover Data <a class="anchor" id="bullet1"></a>

In [2]:
# Glob together all of the Snow Fraction datasets.
snow_ds = glob.glob('Sierra20*.h5')
snow_ds

['Sierra2001.h5',
 'Sierra2002.h5',
 'Sierra2003.h5',
 'Sierra2004.h5',
 'Sierra2005.h5',
 'Sierra2006.h5',
 'Sierra2007.h5',
 'Sierra2008.h5',
 'Sierra2009.h5',
 'Sierra2010.h5',
 'Sierra2011.h5',
 'Sierra2012.h5',
 'Sierra2013.h5',
 'Sierra2014.h5',
 'Sierra2015.h5',
 'Sierra2016.h5',
 'Sierra2017.h5',
 'Sierra2018.h5',
 'Sierra2019.h5']

In [3]:
# Get subdatasets of first snow fraction dataset ('Sierra2001.h5').
datasets = gdal.Open(snow_ds[0], gdal.GA_ReadOnly).GetSubDatasets()

#(sds[3] is to choose the 4th dataset in the subdirectory (i.e., snow fraction). 
#The second bracket [0] is needed to open the dataset.
snow_data = gdal.Open(datasets[3][0])

#Changes the selected dataset into an array.
snow_data_array = snow_data.ReadAsArray()

#Converts the variables to 'float' to allow us to convert NA values (255) to nans
#We also convert 0s to nans so that when plotted on base map, only areas where data is present are shown
snow_data_float=snow_data_array.astype('float')
snow_data_float[snow_data_float == 255] = np.nan

snow_data_transposed = np.transpose(snow_data_float)

### Calcuate Monthly Snow Cover Averages<a class="anchor" id="bullet2"></a>

#### Single Year Test

In [4]:
# Create an empty list to append our calculated values to. 
month_list = []

# Test to see if we can create a range of dates starting at '2000-10-01', the first date of our first year. (Water years count from the year prior).
start_date = (str(2000) + '-10-01')
end_date = (str(int(2000) + 1) + '-09-30')

# Creates a list of datetimes based on each year in our dataset.
# start = start of the water year
# end = end of water year
# freq = frequency of series (in this case days)
year_month_day = pd.Series(pd.date_range(start = start_date, end = end_date, freq="d"))

month = year_month_day[year_month_day.dt.month == 6]

# Since our year_month_day variable starts and ends at the same time as our dataset(e.g., 20xx-10-01 to 20xx-09-03), we can use the subset the months our dataset by the lists index.
# However, as Python counts from 0 onward, and we are interested in the physical values for dates, we need to add 1 to our first and last month values (e.g., change 0-30 to 1-31)
first_day_month = month.index[0].astype('int') + 1 
last_day_month = month.index[-1].astype('int') + 1
        # Subset our yearly data by each months values.
month_len = snow_data_transposed[:,:,first_day_month:last_day_month]
# Takes the mean of each cell in x and y dimensions over the specific month. 
# Axis 2 aligns with our 3rd dimension, which are days in this case.  
mean = np.mean(month_len, axis = 2)
# Appends the values to an empty list.
month_list.append(mean)
# Converts list to an array. 
month_array = np.array(month_list)
# Our array has 3-dimensions (first dimension being month_mean) so we need to subset the data to be 2 dimensions. 
month_array = month_array[0,:,:]


#### For Loop Through Each Year

In [17]:
# Create an empty list to append our values to. 
snow_monthly_means_list = []
for i in range(len(snow_ds)):
    # Get subdatasets of first snow fraction dataset ('Sierra2001.h5').
    datasets = gdal.Open(snow_ds[i], gdal.GA_ReadOnly).GetSubDatasets()

    #(sds[3] is to choose the 4th dataset in the subdirectory (i.e., snow fraction). 
    #The second bracket [0] is needed to open the dataset.
    snow_data = gdal.Open(datasets[3][0])

    #Changes the selected dataset into an array.
    snow_data_array = snow_data.ReadAsArray()

    #Converts the variables to 'float' to allow us to convert NA values (255) to nans
    #We also convert 0s to nans so that when plotted on base map, only areas where data is present are shown
    snow_data_float=snow_data_array.astype('float')
    snow_data_float[snow_data_float == 255] = np.nan
    snow_data_float[snow_data_float == 0 ] = np.nan

    # Need to transpose our data to orient the data values in the right direction for plotting. 
    sf_tanspoed = np.transpose(snow_data_float)
    
    # Make a variable for the starting year of each water year.
    year =  i + 2000
    # Creates a variable for the end date in the water year. 
    start_date = (str(year) + '-10-01')
    end_date = (str(int(year) + 1) + '-09-30')

    # Creates a list of datetimes based on each year in our dataset.
    # start = start of the water year
    # end = end of water year
    # freq = frequency of series (in this case days)
    year_month_day = pd.Series(pd.date_range(start = start_date, end = end_date, freq="d"))
    #Need to create an empty list to append our nest for loop values to. 
    new_list = []
    # For loop to calculate the mean for each month per year. 
    for j in range (1, 13):
        # Subset the date year we just created based on month each month. 
        month = year_month_day[year_month_day.dt.month == j]

        # Since our year_month_day variable starts and ends at the same time as our dataset(e.g., 20xx-10-01 to 20xx-09-03), we can use the subset the months our dataset by the lists index.
        # However, as Python counts from 0 onward, and we are interested in the physical values for dates, we need to add 1 to our first and last month values (e.g., change 0-30 to 1-31)
        first_day_month = month.index[0].astype('int') + 1 
        last_day_month = month.index[-1].astype('int') + 1
        # Subset our yearly data by each months values.
        month_len = snow_data_transposed[:,:,first_day_month:last_day_month]
        # Now we can take the mean of year month, since our 3rd dimension is the subset of days for that month.
        # This will reduce our dimensions down to 2, since we're taking the average over a month. 
        mean = np.mean(month_len, axis = 2)
        # Append monthly mean values to list for a single year. 
        new_list.append(mean)
    # Append single year lists to yearly list, which will contain all the monthly means per year. 
    snow_monthly_means_list.append(new_list)
# Converts list to array. 
snow_monthly_means_per_year_array = np.array(snow_monthly_means_list)
np.shape(snow_monthly_means_per_year_array)

(19, 12, 1841, 1334)

## Calculate Annual Mean Snow Cover for Each Year<a class="anchor" id="bullet4"></a> 

In [18]:
# Create an empty list to append our yearly means to
snow_year_list = []
for i in range(len(snow_ds)):
    # Get subdatasets of first snow fraction dataset ('Sierra2001.h5').
    datasets = gdal.Open(snow_ds[i], gdal.GA_ReadOnly).GetSubDatasets()

    #(sds[3] is to choose the 4th dataset in the subdirectory (i.e., snow fraction). 
    #The second bracket [0] is needed to open the dataset.
    snow_data = gdal.Open(datasets[3][0])

    #Changes the selected dataset into an array.
    snow_data_array = snow_data.ReadAsArray()

    #Converts the variables to 'float' to allow us to convert NA values (255) to nans
    #We also convert 0s to nans so that when plotted on base map, only areas where data is present are shown
    snow_data_float=snow_data_array.astype('float')
    snow_data_float[snow_data_float == 255] = np.nan
    snow_data_float[snow_data_float == 0 ] = np.nan

    snow_data_transposed = np.transpose(snow_data_float)
    # The dimensions of our data is ydim, xdim, days. 
    # Therefore, we can take the mean by our 3rd dimension to find the annual values of snow.
    annual_snow = np.nanmean(snow_data_transposed, axis = 2)
    # Append yearly values to a list. 
    snow_year_list.append(annual_snow)
# Convert list to an array. 
snow_year_array = np.array(snow_year_list)

  annual_snow = np.nanmean(snow_data_transposed, axis = 2)


## Calculate Snow Cover Anomolies Per Month Per Year<a class="anchor" id="bullet5"></a> 

In [19]:
# Takes the mean of each monthly mean
# Note: to get true mean (weighted mean), we would need to take the mean of the cumulatice days of a single month, then divide by the number of days. 
# However, since the sample size only varies by one day every four years (leap years), this will give us near identical values. 

# Since we already have the mean of each month per year stored in a 4d array (year, month, ydim, xdim), we can take the mean the year column (axis 0), since each year columnm
# stores a 3d array for each year. This function will effectively take the mean of each month, ydim, and xdim to end up with a single 3d array. 
mean_of_months = np.mean(snow_monthly_means_per_year_array, axis = 0)

# Create empty list to put anomalies in.
monthly_anomalies = []
# For loop through each year in the dataset (19)
for i in range(len(snow_monthly_means_per_year_array[:])):
    # For loop through each year
    selected_anomaly_year = snow_monthly_means_per_year_array[i]
    # Create an empty list to append our monthly anomalies to
    month_anomalies_list = []
    # For loop through each month of each year
    for j in range(len(selected_anomaly_year[:])):
        # Subtract our monthly means from our annual means for each month
        monthly_anom = selected_anomaly_year[j] - mean_of_months[j]
        # Append list with the calculated monthly anomalies.
        month_anomalies_list.append(monthly_anom)
    # Append list with list comprised of monthly anomalies
    # Final list values will be the same shape as the monthly_means_per_year dataset since we are calculting monthly anomalies for each year.
    monthly_anomalies.append(month_anomalies_list)
# Transform to list to arrays
snow_anom_array = np.array(monthly_anomalies)


## Select Indivdual Snow Cover Years and Months<a class="anchor" id="bullet6"></a> 

In [20]:
# Shape of list is year, month, ydim, xdim
np.shape(snow_monthly_means_per_year_array)

# Select the first year of our dataset.
snow_year_one = snow_monthly_means_per_year_array[0,:,:,:]
# Transpose data so that the dataset is ordered as: [xdim, ydim, month]
snow_year_one_transposed = np.transpose(snow_year_one, (1,2,0))


#Select the first year of the anomaly dataset. 
snow_year_one_anom = snow_anom_array[0,:,:,:]
snow_year_one_anom_transposed = np.transpose(snow_year_one)

# Select the first month of the first year
snow_year_one_month_one_anom = snow_year_one_anom[0,:,:]
# Transpose to make months the 3rd dimension
snow_y_one_m_one_anom_t = snow_year_one_month_one_anom

# Process Albedo Data<a class="anchor" id="bullet7"></a> 

In [None]:
# Glob together all of the albedo datasets.
albedo = glob.glob('SierraAlbedo*.h5')
albedo

## Calculate Monthly Albedo Averages<a class="anchor" id="bullet8"></a> 

In [7]:
# Get subdatasets of first snow fraction dataset ('SierraAlbedo2001.h5').
albedo_monthly_mean_list = []
for i in range(len(albedo)):
    dataset = gdal.Open(albedo[i], gdal.GA_ReadOnly)


    #Changes the selected dataset into an array.
    albedo_array = dataset.ReadAsArray()

    albedo_float = albedo_array.astype('float')
    albedo_float[albedo_float == 65535] = np.nan
    albedo_float[albedo_float == 0] = np.nan


    albedo_test = np.transpose(albedo_float)
    
    # Make a variable for the starting year of each water year.
    year =  i + 2000
    # Creates a variable for the first date in the water year. 
    start_date = pd.to_datetime(str(year) + '-10-01')
    end_date = pd.to_datetime(str(int(year) + 1) + '-09-30')
    # Creates a list of datetimes based on each year in our dataset.
    albedo_year_month_date = pd.Series(pd.date_range(start = start_date, end = end_date, freq="d"))
    #Need to create an empty list to append our mean values to. 
    new_list = []
    # For loop to calculate the mean for each month per year. 
    for j in range (1, 13):
        # Subset the date year based on month.
        month = albedo_year_month_date[albedo_year_month_date.dt.month == j]
        
        first_day_month = month.index[0].astype('int') + 1 
        last_day_month = month.index[-1].astype('int') + 1
        
        # Subset dataset by each month.
        month_len = albedo_test[:,:,first_day_month:last_day_month]
        # Take the mean of each month per year. 
        mean = np.mean(month_len, axis = 2)
        # Append mean values to list per year. 
        new_list.append(mean)
    # Append year lists to empty list. 
    albedo_monthly_mean_list.append(new_list)
# Converts list to array. 
albedo_monthly_mean_array = np.array(albedo_monthly_mean_list)
np.shape(albedo_monthly_mean_array)

# Since we're interested in albedo rates, we need to divide our values by the divisor(10000) 
# Create empty list to put values in 
albedo_monthly_divisor = []
# Select year
for i in range(len(albedo_monthly_mean_array[:])):
    # albedo_monthly_mean_array[i] chooses the first year in the dataset since the first dim is year
    year_divisor = albedo_monthly_mean_array[i]
    sub_list = []
    for j in range(len(year_divisor[:])):
        # year_divisor[j] chooses the first mean month of the selected year
        # We divide each value in our array by 10000 to get the real value of each cell
        albedo_monthly_anom = year_divisor[j]/10000
        # Append each monthly value to a list
        sub_list.append(albedo_monthly_anom)
    # Append each year to list
    albedo_monthly_divisor.append(sub_list)
# Convert list to array
albedo_divisor_array = np.array(albedo_monthly_divisor)

## Calculate Albedo Anomalies Per Month Per Year <a class="anchor" id="bullet10"></a> 

In [10]:
# Takes the mean of each monthly mean
# Note: to get true mean (weighted mean), we would need to take the mean of the cumulatice days of a single month, then divide by the number of days. 
# However, since the sample size only varies by one day every four years (leap years), this will give us near identical values. 
month_mean_albedo = np.mean(albedo_divisor_array, axis = 0)
np.shape(month_mean_albedo)


# Create empty list to put values in 
albedo_monthly_anom = []
# Select range in years (1, 19)
for i in range(len(albedo_divisor_array[:])):
   # albedo_divisor_array[i] chooses the first year in the dataset since the first dim is year
    albedo_year_anom = albedo_divisor_array[i]
    # Create an empty list to append nest for loop values to.
    sub_list = []
    # select range in months per year(1, 12)
    for j in range(len(albedo_year_anom[:])):
        # albedo_year_anom[j] chooses the first mean month of the selected year
        # Since we've already calculated the annual monthly mean over 19 years, we can subtract each 
        # monthly mean from the annual monthly mean to see how each years monthly means deviate from the annual monthly mean   
        month_anom = albedo_year_anom[j] - month_mean_albedo[j]
        # Append monthly anomolies to a list 
        sub_list.append(month_anom)
    # Append monthly anomolies per year to a list
    albedo_monthly_anom.append(sub_list)
# Convert anomaly list to an array
anom_array = np.array(albedo_monthly_anom)

## Select Indivdual Albedo Years and Months <a class="anchor" id="bullet11"></a> 

In [13]:
# Select the first year of the 
albedo_year_one_mean = albedo_divisor_array[0,:,:,]
albedo_year_three_mean = albedo_divisor_array[2,:,:,]
np.shape(albedo_year_one_mean)

(12, 1841, 1334)

# Convert Datasets to GeoTIFFs <a class="anchor" id="bullet12"></a> 


## Define Geospatial Inputs <a class="anchor" id="bullet3"></a> 

In [22]:
# Note: Since relevant Geospatial Metadata is the same for snow cover and albedo, we can use the same inputs for functions below. 

# x dimension of array
xdim = snow_data_array.shape[1]
# y dimension of array
ydim = snow_data_array.shape[2]
# Projection data of sample GeoTiff
projection = 'PROJCS["Albers Conical Equal Area",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["latitude_of_center",0],PARAMETER["longitude_of_center",-120],PARAMETER["standard_parallel_1",34],PARAMETER["standard_parallel_2",40.5],PARAMETER["false_easting",0],PARAMETER["false_northing",-4000000],UNIT["meters",1],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'
# transformation data of array
# Pull refrencing matrix from h5 file.
ref_matrix_meta = snow_data.GetMetadata()['Grid_MODIS_GRID_500m_ReferencingMatrix'].split()
referencing_matrix = [int(ref_matrix_meta[2]), int(ref_matrix_meta[1]), int(ref_matrix_meta[0]), int(ref_matrix_meta[5]), int(ref_matrix_meta[4]), int(ref_matrix_meta[3])]

## Convert Array to Geotiff <a class="anchor" id="bullet13"></a> 

In [11]:
# Define the variables needed to get the function to run. 
def SingleGeotiff(raster_name, data, height, width, geotransform, wkt):
    # Set driver to 'GTiff' for Geotiffs
    driver = gdal.GetDriverByName('GTiff')
    # Create a GeoTIFF, where:
    # name = raster name,
    # width = 1st dim,
    # height = 2nd dim,
    # 1 = 3rd dim (effectively making it a two-dimensional object),
    # GDT_Float32 = number format 
    dataset = driver.Create(
        raster_name,
        width,
        height,
        1,
        gdal.GDT_Float32)

    dataset.SetGeoTransform((
     geotransform))

    dataset.SetProjection(wkt)
    dataset.GetRasterBand(1).WriteArray(data)
    dataset.FlushCache()  # Write to disk.
    return dataset, dataset.GetRasterBand(1) 

In [23]:
SingleGeotiff('year_one_month_one_anom.tif', snow_y_one_m_one_anom_t, ydim, xdim, referencing_matrix, projection)

(<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x0000021E16ECC1E0> >,
 <osgeo.gdal.Band; proxy of <Swig Object of type 'GDALRasterBandShadow *' at 0x0000021E0BBD2450> >)

## Convert Stacked Array into Multiple Geotiffs <a class="anchor" id="bullet14"></a> 

In [None]:
year_one_t = np.transpose(snow_year_one)
path = 'tif/2001'
for i in range(len(snow_year_one) -1):
    dest = ('month' + str(i + 1) + 'year_2001.tif') 
    name = os.path.join(path, dest)
    data = snow_year_one_transposed[:,:,i]
    SingleGeotiff(name, data, ydim, xdim, referencing_matrix, projection)

In [None]:
wgs = glob.glob('tif/2001/month*year*.tif')
wgs

['tif/2001\\month10year_2001.tif',
 'tif/2001\\month11year_2001.tif',
 'tif/2001\\month1year_2001.tif',
 'tif/2001\\month2year_2001.tif',
 'tif/2001\\month3year_2001.tif',
 'tif/2001\\month4year_2001.tif',
 'tif/2001\\month5year_2001.tif',
 'tif/2001\\month6year_2001.tif',
 'tif/2001\\month7year_2001.tif',
 'tif/2001\\month8year_2001.tif',
 'tif/2001\\month9year_2001.tif']

In [None]:
# Change the following variables to the file you want to convert (inputfile)
# and what you want to name your output file (outputfile)
for i in range(len(wgs)):
    inputfile = wgs[i]
    outputfile = "wgs_test" + str(i) + ".tif"
    #Do not change the following line, it will reproject the geotiff file
    ds = gdal.Warp(outputfile, inputfile, dstSRS="+proj=longlat +datum=WGS84 +no_defs", dstNodata = 0)

### Convert Stacked Array to Geotiff <a class="anchor" id="bullet15"></a> 

In [None]:
def StackedGeotiff(name, array, geo_transform, projection):
    
    driver = gdal.GetDriverByName('GTiff')

    DataSet = driver.Create(name, array.shape[2], array.shape[1], array.shape[0], gdal.GDT_Float32)
    DataSet.SetGeoTransform(geo_transform)
    DataSet.SetProjection(projection)
    for i, image in enumerate(array, 1):
        DataSet.GetRasterBand(i).WriteArray( image )
    DataSet.FlushCache()
    return name

In [None]:
StackedGeotiff('year_one_stack.tif', snow_year_one, referencing_matrix, projection)

'year_one_stack.tif'

## Transform Geotiff to WGS84 Coordinate System <a class="anchor" id="bullet15"></a> 

In [None]:
# Change the following variables to the file you want to convert (inputfile)
# and what you want to name your output file (outputfile)
inputfile = "sample_stack.tif"
outputfile = "wgs_sample_stack.tif"

ds = gdal.Warp(outputfile, inputfile, dstSRS="+proj=longlat +datum=WGS84 +no_defs", dstNodata = 0)

## Transform Geotiff to NetCDF File <a class="anchor" id="bullet16"></a> 

In [None]:
tiff = rio.open_rasterio('sample_stack.tif')
tiff.to_netcdf('sample_stack.nc')

# Plot Rasters Using Leafmap <a class="anchor" id="bullet17"></a> 

In [None]:
# layer_one = ("wgs_test4.tif")
# m = leafmap.Map(draw_control=False, layers_control=True)
# m.add_raster(input_one, colormap='terrain', layer_name='layer_one')
# m