In [1]:
%matplotlib inline
#mpld3.enable_notebook()
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
import xarray as xr
from astropy.io import ascii
import pytz
# OS interaction
import sys
import os
import glob
import wget
import seaborn as sns
sns.set_context("talk",font_scale=1.5)
sns.set_style('whitegrid')

# User config

In [2]:
# Paths to user files
data_dir = os.path.normpath(r'F:\Work\e\Data\Obs\Canada_Project_Sites\CSAS_data') # Where to store data on local computer
git_dir  = os.path.normpath(r'C:\Users\new356\Google Drive\Python\CSAS') # This repo


# Create paths

In [3]:
# Data network
network = 'BC_NRT'

# Location to download current AB station data
download_dir = os.path.join(data_dir,network,'current')
# Make if does not exist
if not os.path.exists(download_dir):
    os.makedirs(download_dir)
    
# Netcdf file to save to
netcdf_dir   = os.path.join(data_dir,network,'netcdf')
# Make if does not exist
if not os.path.exists(netcdf_dir):
    os.makedirs(netcdf_dir)
netcdf_file_out =  os.path.join(netcdf_dir,'BC_NRT.nc')

# Metadata for AB pillows 
meta_file         = 'BC_Station_Metadata.csv'
meta_file_path    = os.path.join(git_dir,'metadata',meta_file)

# Download Near-real time BC data (Updated hourly)

In [4]:
os.chdir(download_dir)
BC_files = ['SW.csv','SD.csv','TA.csv','PC.csv']
Var_names = ['SWE','Snowdepth','AirTemperature','Precipitation']
Var_units = ['mm','cm','C','mm']
c_network = 'bcRiverForecastCenter'

In [5]:
# Remove previous files
for cfile in BC_files:
    try:
        os.remove(cfile)
    except OSError:
        pass

In [6]:
# Download newest files
url_base         = 'http://bcrfc.env.gov.bc.ca/data/asp/realtime/data/'
[wget.download(url_base+cfile) for cfile in BC_files]

100% [......................................................] 2249142 / 2249142

['SW.csv', 'SD.csv', 'TA.csv', 'PC.csv']

In [7]:
# Import metadata for each station
metadata = pd.read_csv(meta_file_path,index_col=1,delimiter=',',encoding='utf-8')

In [8]:
# Import each data variable
ds_dict   = {}
unit_dict = {}
for (i,cf) in enumerate(BC_files):
    print(cf)
    
    # Load in to python
    df = pd.read_csv(cf,index_col=0, skipfooter=1, engine='python', parse_dates=True)
    var_name_full  = Var_names[i]
    var_units = Var_units[i]
    df.index.names = ['Time_UTC']
    
    # Check for error in PC.csv
    if cf=='PC.csv':
        print('fixing Muskwa-Kechik error')
        df = df.rename(columns = {'4A34P Muskwa-Kechika':'4A34P Dowling Creek'})

    # Store as dict
    ds_dict[var_name_full] = df
    unit_dict[var_name_full] = var_units


SW.csv
SD.csv
TA.csv
PC.csv
fixing Muskwa-Kechik error


In [9]:
# Merge into netcdf
ds = xr.Dataset(ds_dict)
ds.rename({'dim_1':'staID'},inplace=True) # rename time
ds['staID']        = [str(x).split(' ')[0] for x in ds.staID.values]

In [10]:
## ADD UNITS
# Add variable attributes (units), and fix variable names (remove spaces)
for cvar in ds.data_vars:
    # add units as attributes
    ds.get(cvar).attrs['unit']   = unit_dict[cvar]

In [11]:
## Add station metadata
ds['station_name'] = xr.DataArray(metadata['station'],coords={'staID':metadata.index}, dims='staID')
ds['Lat'] = xr.DataArray(metadata['latitude'],coords={'staID':metadata.index}, dims='staID')
ds['Lon'] = xr.DataArray(metadata['longitude'],coords={'staID':metadata.index}, dims='staID')
ds['Elevation'] = xr.DataArray(metadata['elevation'],coords={'staID':metadata.index}, dims='staID')

In [12]:
# Move to coords
ds.set_coords(['station_name','Lat','Lon','Elevation'],inplace=True)
ds

<xarray.Dataset>
Dimensions:         (Time_UTC: 7672, staID: 72)
Coordinates:
  * staID           (staID) object '1A01P' '1A02P' '1A03P' '1A05P' '1A12P' ...
  * Time_UTC        (Time_UTC) datetime64[ns] 2016-10-01 2016-10-01T01:00:00 ...
    station_name    (staID) object 'Yellowhead Lake' 'McBride' 'Barkerville' ...
    Lat             (staID) float64 52.91 53.31 53.06 53.95 56.02 54.11 54.3 ...
    Lon             (staID) float64 -118.5 -120.3 -121.5 -121.4 -126.3 ...
    Elevation       (staID) float64 1.852e+03 1.58e+03 1.48e+03 1.693e+03 ...
Data variables:
    AirTemperature  (Time_UTC, staID) float64 5.3 7.5 8.8 6.4 2.5 7.7 1.1 ...
    Precipitation   (Time_UTC, staID) float64 787.0 663.0 420.0 nan nan ...
    SWE             (Time_UTC, staID) float64 2.0 0.0 1.0 0.0 0.0 10.0 22.0 ...
    Snowdepth       (Time_UTC, staID) float64 nan nan 1.0 0.0 0.0 66.0 10.0 ...

In [13]:
ds = ds.T
ds

<xarray.Dataset>
Dimensions:         (Time_UTC: 7672, staID: 72)
Coordinates:
  * staID           (staID) object '1A01P' '1A02P' '1A03P' '1A05P' '1A12P' ...
  * Time_UTC        (Time_UTC) datetime64[ns] 2016-10-01 2016-10-01T01:00:00 ...
    station_name    (staID) object 'Yellowhead Lake' 'McBride' 'Barkerville' ...
    Lat             (staID) float64 52.91 53.31 53.06 53.95 56.02 54.11 54.3 ...
    Lon             (staID) float64 -118.5 -120.3 -121.5 -121.4 -126.3 ...
    Elevation       (staID) float64 1.852e+03 1.58e+03 1.48e+03 1.693e+03 ...
Data variables:
    AirTemperature  (staID, Time_UTC) float64 5.3 5.1 3.9 3.7 3.6 3.3 3.0 ...
    Precipitation   (staID, Time_UTC) float64 787.0 787.0 787.0 787.0 787.0 ...
    SWE             (staID, Time_UTC) float64 2.0 2.0 2.0 2.0 2.0 2.0 2.0 ...
    Snowdepth       (staID, Time_UTC) float64 nan nan nan nan nan nan nan ...

In [20]:
ds.SWE.values

array([[  2.,   2.,   2., ...,  nan,  nan,  nan],
       [  0.,   0.,   0., ...,   9.,   9.,   9.],
       [  1.,   1.,   1., ...,   0.,   0.,   0.],
       ..., 
       [  2.,   2.,   2., ...,   0.,   0.,   0.],
       [ nan,  nan,  nan, ...,   5.,   5.,   4.],
       [  0.,   0.,   0., ...,   0.,   0.,   0.]])

In [24]:
# Add Network
ds.coords['network'] = xr.DataArray([c_network for x in ds.staID], dims='staID')

In [25]:
# Save as netcdf file
ds.to_netcdf(netcdf_file_out)