In [1]:
# Import modules
# ipython magic to plot in line
%matplotlib inline
#import mpld3
#mpld3.enable_notebook()
import matplotlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from astropy.io import ascii
import pytz
# OS interaction
import sys
import os

In [2]:
# Directories
# Path to raw data
main_dir   = os.path.normpath('F:\Work\e\Data\Obs\Canada_Project_Sites\CRHO_Sites\Raw')
#main_dir   = os.path.normpath('C:\Users\new356\Google') # Drive\Postdoc_Research\Data\CRHO_Sites\Raw')
# Ascii input folder
dir_in     = main_dir + '\ASCII'
# netcdf output folder
dir_out = main_dir + '\\netcdf'

In [4]:
# Define input format of ascii files
input_format = 'CRHO_TELM'

In [5]:
if input_format == 'CRHO_TELM':
    # Ascii data format info
    c_header = 4 # Header lines
    c_column_line = 1 # line where column names start
    c_delimiter = ','
    # time zone variables
    #tz_in = pytz.timezone('Canada/Mountain')
    tz_in = pytz.timezone('Etc/GMT-6')

In [6]:
# Get file in info
os.chdir(dir_in) # Move to input
content = os.listdir(os.getcwd()) # Get list of files
num_files = len([name for name in os.listdir('.') if os.path.isfile(name)]) # Get number of files in

In [33]:
# Read in each file
ds_ALL = []
for cfile in content:
    # Get current station name
    csta_name = cfile[0:3] # Take the first three letter abbreviation as the name
    print('Processing ' + csta_name)
    
    # Import data to pandas dataframe
    dat = ascii.read(cfile,header_start=c_column_line,data_start=c_header,delimiter=c_delimiter)
    datain = pd.DataFrame(dat.as_array())
    
    # Make TIMESTAMP the index
    datain['TIMESTAMP'] = datain['TIMESTAMP'].astype('datetime64[ns]')
    datain = datain.set_index('TIMESTAMP')
    
    # Set time zone
    datain.index = datain.index.tz_localize(tz_in)
    
    # Import header info
    headerinfo = pd.read_csv(cfile,nrows=2,skiprows=1)
    
    # Convert to xray
    ds = xr.Dataset.from_dataframe(datain)
    
    # Rename time
    ds.rename({'TIMESTAMP':'time'},inplace=True)
    
    # Add new site dimension and make record
    ds = xr.concat([ds],'site',data_vars='all')
    #ds_site['site'] = csta_name

    # Add Coords
    print('need to get correct site location info, make these variables')
    #c_LAT xr.DataArray(39.0,['site',0])
    #ds = xr.concat({'LAT':('site',[39.0])})
    ds.coords['LAT'] = 39.0
    ds.coords['LON'] = -120.0
    #ds.coords['sta_name'] = csta_name
    
    # Add variable attributes (units and sample period)
    for cvar in ds.data_vars:
        ds.get(cvar).attrs['unit']   = headerinfo.loc[0,[cvar]].values[0]
        ds.get(cvar).attrs['sample'] = headerinfo.loc[1,[cvar]].values[0]
        
    # Change variable names to CF format?
    
    # Export to netcdf
    cfileout = os.path.join(dir_out,csta_name + '.nc')
    ds.to_netcdf(cfileout,format='netcdf4') #,encoding={'AirTemp_Avg': {'dtype': 'int16', 'scale_factor': 0.1, '_FillValue': -9999}})

Processing BNS
need to get correct site location info




TypeError: can only concatenate xarray Dataset and DataArray objects

In [43]:
datain.index

DatetimeIndex(['2015-09-01 20:15:00+06:00', '2015-09-01 20:30:00+06:00',
               '2015-09-01 20:45:00+06:00', '2015-09-01 21:00:00+06:00',
               '2015-09-01 21:15:00+06:00', '2015-09-01 21:30:00+06:00',
               '2015-09-01 21:45:00+06:00', '2015-09-01 22:00:00+06:00',
               '2015-09-01 22:15:00+06:00', '2015-09-01 22:30:00+06:00',
               ...
               '2016-05-13 16:45:00+06:00', '2016-05-13 17:00:00+06:00',
               '2016-05-13 17:15:00+06:00', '2016-05-13 17:30:00+06:00',
               '2016-05-13 17:45:00+06:00', '2016-05-13 18:00:00+06:00',
               '2016-05-13 18:15:00+06:00', '2016-05-13 18:30:00+06:00',
               '2016-05-13 18:45:00+06:00', '2016-05-13 19:00:00+06:00'],
              dtype='datetime64[ns, Etc/GMT-6]', name='TIMESTAMP', length=24474, freq=None)

In [None]:
# Open and combine multiple files
#os.chdir(dir_out)
#ds_ALL = xr.open_mfdataset('BNS.nc',chunks=10,concat_dim='Site')