In [27]:
## saving the files in a netcdf format

def save_netcdf(data_frame,
                time_column,
                file_name,
                varibale_name,
                unit_of_variable,
                variable_long_name,
                time_step = 'seconds', # minutes, hours, or days
                calendar = 'standard',
                ID  = None, # array of int values; numpy array
                lat = None, # array of lat values; numpy array
                lon = None, # array of lon values; numpy array
                author_name = ' ',
                case_name = ' ',
                license = ' '):
    
    # load modules
    import warnings
    import pandas   as pd
    import numpy    as np
    import netCDF4  as nc4
    import sys
    import os
    import time
    
    # convert the data time of the data_frame to index
    data_frame[time_column] = pd.to_datetime(data_frame[time_column], infer_datetime_format=True)
    data_frame = data_frame.set_index(time_column) # set as index

    # get the first string of the datetime as the starting point of the time
    start_time_unit = str(data_frame.index[0])
    # get the time unit
    if   time_step.lower() in ['seconds','second','sec','s']:
        time_var = data_frame.index.to_series().diff()/np.timedelta64(1, 's')
        time_unit = 'seconds'
    elif time_step.lower() in ['minutes','minute','min','m']:
        time_var = data_frame.index.to_series().diff()/np.timedelta64(1, 'm')
        time_unit = 'minutes'
    elif time_step.lower() in ['hours','hour','hr','h']:
        time_var = data_frame.index.to_series().diff()/np.timedelta64(1, 'h')
        time_unit = 'hours'
    elif time_step.lower() in ['days','day','d']:
        time_var = data_frame.index.to_series().diff()/np.timedelta64(1, 'h')
        time_var = time_var / 24
        time_unit = 'days'
    else:
        sys.exit('The time_unit provided is not recognized.')
    # creating the time variable
    time_var[0] = 0 # set the inital time_step as zero (from NaN)
    time_var = np.array(time_var) # change to numpy arrary
    time_var = np.add.accumulate(time_var) # change to assumulated from first time step
    # check if time is in int or float
    temp =[not (i%1) for i in time_var ]
    temp = all(temp)
    if temp: # all int
        time_dtype_code = 'i4'
    else: # there is float
        time_dtype_code = 'f8'
    
    # write the netcdf file
    with nc4.Dataset(file_name, "w", format="NETCDF4") as ncid: # creating the NetCDF file
        # define the dimensions
        dimid_N = ncid.createDimension('n', len(data_frame.columns))  # limited dimensiton equal the number of hruID
        dimid_T = ncid.createDimension('time', None)   # unlimited dimensiton
        # Variable time
        time_varid = ncid.createVariable('time', time_dtype_code, ('time', ))
        # Attributes
        time_varid.long_name = 'time'
        time_varid.units = time_unit +' since '+start_time_unit  # e.g. 'days since 2000-01-01 00:00' should change accordingly
        time_varid.calendar = calendar
        time_varid.standard_name = 'time'
        time_varid.axis = 'T'
        time_varid[:] = time_var
        # variable to be saved
        data_varid = ncid.createVariable(varibale_name, 'f8', ('time', 'n'), fill_value = -9999)
        data_varid.long_name = variable_long_name
        data_varid.units = unit_of_variable
        data_varid[:] = np.array(data_frame)
        # variable for name of the columns
        Station_ID_varid = ncid.createVariable('column_name',str,('n',))
        Station_ID_varid.long_name = 'column name'
        Station_ID_varid.units = '1'
        Station_ID_varid [:] =  np.array(list(data_frame.columns), dtype='object')
        # variable ID, from 1 to n
        ID_varid = ncid.createVariable('ID','i4',('n',))
        ID_varid.long_name = 'ID'
        ID_varid.units = '1'
        if ID:
            ID_varid [:] =  ID
        else:
            ID_varid [:] =  np.arange(len(data_frame.columns))+1
        # variable lat
        if lat and lon:
            lat_varid = ncid.createVariable('latitude','f8',('n',), fill_value = -9999)
            lat_varid.long_name = 'latitude'
            lat_varid.units = 'degrees_north'
            lat_varid[:] = lat
            # variable lon
            lon_varid = ncid.createVariable('longitude','f8',('n',), fill_value = -9999)
            lon_varid.long_name = 'longitude'
            lon_varid.units = 'degrees_east'
            lon_varid[:] = lon
        # set the general info
        ncid.Conventions = 'CF-1.6'
        ncid.Author = 'The data were written by ' + author_name
        ncid.License = license
        ncid.History = 'Created ' + time.ctime(time.time())
        ncid.Source = 'Case: ' + case_name + '; remapped by script from library of Shervan Gharari (https://github.com/ShervanGharari/EASYMORE).'


In [24]:
# import pandas as pd
# import numpy  as np



# df = pd.read_csv('../data/05HF003.csv');
# df['sss'] = df['Value [m]']

# time_column= 'Date'; file_name = 'test.nc'
# varibale_name = 'XXX'
# unit_of_variable = 'm'
# variable_long_name = 'ff'

# # os.remove(file_name)

# save_netcdf(df,
#             time_column,
#             file_name,
#             varibale_name,
#             unit_of_variable,
#             variable_long_name,
#             time_step = 'seconds', # minutes, hours, or days
#             calendar = 'standard',
#             ID  = None, # array of int values; numpy array
#             lat = None, # array of lat values; numpy array
#             lon = None, # array of lon values; numpy array
#             author_name = ' ',
#             case_name = ' ',
#             license = ' ')

