# Conversion of forcing created for MESH to HYPE nc or txt format for model simulation.
## nc creation is based on this page:
http://www.smhi.net/hype/wiki/doku.php?id=start:hype_file_reference:nnobs.nc

In [1]:
# packages are loaded
import xarray as xr
import pint_xarray
import glob
import netCDF4 as nc4
import os
import pandas as pd
from   easymore import Easymore

In [2]:
# inputs
# Set the folder path where the remapped .nc file is located for MESH (it can be any remapped nc file)
nc_file_source = '/scratch/mia725/calibration_workflow/Bow_Banff/MESH/MESH_forcing.nc'
path_to_save = '/scratch/mia725/calibration_workflow/Bow_Banff/HYPE/'

In [3]:
if not os.path.isdir(path_to_save):
    os.makedirs(path_to_save)
    
def convert_hourly_to_daily (input_file_name,
                             variable_in,
                             variable_out,
                             variable_out_long_name = None,
                             var_unit_conversion = None,
                             var_time = 'time',
                             var_id = 'id',
                             time_diff = 0,
                             stat = 'max', 
                             output_file_name_nc = None,
                             output_file_name_txt = None,
                             Fill_value = -9999.0): # 'max', 'min', 'mean'
    
    # read the input houtly nc file
    ds = xr.open_dataset(nc_file_source)
    
    ds = ds.rename({'subbasin': 'id'})
    
    # drop all the other variables except the mentioned varibale, time and id
    variables_to_keep = [variable_in, var_time]
    if not var_id is None:
        variables_to_keep.append(var_id)

    # Drop all variables except the specified ones
    ds = ds.drop([v for v in ds.variables if v not in variables_to_keep])
    
    # roll the time based on hour of difference to have more accurate
    if time_diff !=0:
        ds[var_time] = ds[var_time].roll(time=time_diff)
        # Remove the first or last roll_steps time steps
        if time_diff < 0:
            ds = ds.isel(time=slice( None, time_diff))
        elif time_diff > 0:
            ds = ds.isel(time=slice( time_diff, None))
            
    # to create the xarray dataframe with daily time
    if stat == 'max':
        ds_daily = ds.resample(time='D').max()
    elif stat == 'min':
        ds_daily = ds.resample(time='D').min()
    elif stat == 'mean':
        ds_daily = ds.resample(time='D').mean()
    elif stat == 'sum':
        ds_daily = ds.resample(time='D').sum()
    else:
        sys.exit('input stat should be max, min, mean or sum')
        
    # conversion of units based on provided conversion unit
    ds_daily[variable_in] = ds_daily[variable_in].pint.quantify(var_unit_conversion['in_unit'])
    ds_daily[variable_in] = ds_daily[variable_in].pint.to(var_unit_conversion['out_unit'])
    ds_daily = ds_daily.pint.dequantify()
    
    # drop the vairiable in
    ds_daily = ds_daily.rename({variable_in: variable_out})
    
    # add long name
    if not variable_out_long_name is None:
        ds_daily[variable_out].attrs['long_name'] = variable_out_long_name
        
    # transpose the variable
    ds_daily[variable_out] = ds_daily[variable_out].transpose()
    
    # this section is written to avoid issues with netcdf and HYPE!
    # I could not find what is the issue, however, when the data is 
    # transferred to df, tranfer back to xarray and saved, the issue
    # with HYPE is resolved. this need closer look. Also HYPE netcdf
    # is in its initial stage of developement and can have issue as
    # well
    df = ds_daily[variable_out].to_dataframe()
    df = df.unstack()
    df = df.T
    df = df.droplevel(level=0, axis=0)
    df.columns.name = None
    df.index.name = var_time
    if not output_file_name_txt is None:
        df.to_csv(output_file_name_txt,\
                  sep='\t', na_rep='', index_label='time', float_format='%.3f')
    esmr = Easymore()
    ds_daily = esmr.dataframe_to_netcdf_xr(df,
                                     data_frame_DateTime_column = var_time,
                                     variable_name = variable_out,
                                     variable_dim_name = 'id',
                                     unit_of_variable = var_unit_conversion['out_unit'],
                                     variable_long_name = variable_out_long_name,
                                     Fill_value = Fill_value)
    
    # save the file if path is provided
    if not output_file_name_nc is None:
        if os.path.isfile(output_file_name_nc):
            os.remove(output_file_name_nc)
        ds_daily.to_netcdf(output_file_name_nc,\
                           encoding = {variable_out:{'_FillValue':Fill_value}})
        
    # return
    return ds_daily

ds1= convert_hourly_to_daily(nc_file_source,
                            'RDRS_v2.1_P_TT_09944',
                            'TMAXobs',
                            var_unit_conversion = {'in_unit':'K','out_unit':'degreeC'},
                            var_time = 'time',
                            var_id = 'id',
                            time_diff = -7,
                            stat = 'max',
                            # output_file_name_nc = path_to_save+'TMAXobs.nc',
                            output_file_name_txt = path_to_save+'TMAXobs.txt')

ds2= convert_hourly_to_daily(nc_file_source,
                            'RDRS_v2.1_P_TT_09944',
                            'TMINobs',
                            var_unit_conversion = {'in_unit':'K','out_unit':'degreeC'},
                            var_time = 'time',
                            var_id = 'id',
                            time_diff = -7,
                            stat = 'min',
                            # output_file_name_nc = path_to_save+'TMINobs.nc',
                            output_file_name_txt = path_to_save+'TMINobs.txt')

ds3= convert_hourly_to_daily(nc_file_source,
                            'RDRS_v2.1_P_TT_09944',
                            'Tobs',
                            var_unit_conversion = {'in_unit':'K','out_unit':'degreeC'},
                            var_time = 'time',
                            var_id = 'id',
                            time_diff = -7,
                            stat = 'mean',
                            # output_file_name_nc = path_to_save+'Tobs.nc',
                            output_file_name_txt = path_to_save+'Tobs.txt')

ds4= convert_hourly_to_daily(nc_file_source,
                            'RDRS_v2.1_A_PR0_SFC',
                            'Pobs',
                            var_unit_conversion = {'in_unit':'mm s**-1',\
                                                   'out_unit':'mm day**-1'},
                            var_time = 'time',
                            var_id = 'id',
                            time_diff = -7,
                            stat = 'mean',
                            # output_file_name_nc = path_to_save+'Pobs.nc',
                            output_file_name_txt = path_to_save+'Pobs.txt')


EASYMORE version 2.0.0 is initiated.
EASYMORE detects that the input datafarame is pandas dataframe
EASYMORE detects that index is pandas datatime
EASYMORE version 2.0.0 is initiated.
EASYMORE detects that the input datafarame is pandas dataframe
EASYMORE detects that index is pandas datatime
EASYMORE version 2.0.0 is initiated.
EASYMORE detects that the input datafarame is pandas dataframe
EASYMORE detects that index is pandas datatime
EASYMORE version 2.0.0 is initiated.
EASYMORE detects that the input datafarame is pandas dataframe
EASYMORE detects that index is pandas datatime
