# Generate Detector Level covariates file

Our goal here it to consolidate the detector level covariates and augment them with data we can extract based on the location (lat, long) and date of sampling.

by Colin Talbert 2018-11-27

In [1]:
from pathlib import Path
import datetime

import  pandas as pd
import numpy as np
from pyproj import Proj, transform
from astral import Astral


from ulmo.nasa import daymet  

You can access Timestamp as pandas.Timestamp
  CSV_SWITCHOVER = pandas.tslib.Timestamp('2016-10-01')


In [2]:
output_dname = r"Z:\TSH\DD274_NABat\CNHP_data_processing\Derived\2017_output"
out_dir = Path(output_dname)

In [3]:
# site_md is file provided by Jeremy Siemers of the Colorado Natural Heritage Program.  It contains information about sampling nites, equipment used, local covariates collected, etc.
site_md_fname = r"F:\CO_NABat\CO NABAT 2016 Bulk_Stationary_Acoustic_Template.csv"
site_md = pd.read_csv(site_md_fname, skiprows=[1], parse_dates=['First Monitoring Night', 'Fourth Monitoring Night'])
site_md = site_md.rename({'Grts Id':'GridCellID', 'Location Name':'SiteID'}, axis=1)

site_md = site_md.replace({"Redlands": "REDLAND", "Ribbon":"RIBBON"})

site_md = site_md.set_index(['GridCellID', 'SiteID'], drop=False)


inProj = Proj(init='epsg:26913')
outProj = Proj(init='epsg:4326')

def utm_to_wgs84(row):    
    x1,y1 = row['UTME'], row['UTMN']
    x2,y2 = transform(inProj,outProj, x1, y1)
    return (x2,y2)

site_md['Long'], site_md['Lat'] = zip(*site_md.apply(utm_to_wgs84, axis=1))


site_md.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,GridCellID,SiteID,UTME,UTMN,First Monitoring Night,Fourth Monitoring Night,Detector,Microphone,Microphone Orientation,Microphone Height (meters),Distance to Nearest Clutter (meters),Clutter Type,Distance to Nearest Water (meters),Water Type,Percent Clutter,Broad Habitat Type,Audio Recording Name (*.wav *.zc),Software Type,Long,Lat
GridCellID,SiteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,NW,2,NW,170440,4355996,2016-06-30,2016-07-03,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 3.x,-108.821023,39.290652
2,SW,2,SW,174304,4354564,2016-06-30,2016-07-03,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 3.x,-108.775622,39.279238
2,NE,2,NE,176892,4360344,2016-06-30,2016-07-03,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 3.x,-108.748456,39.332177
2,SE,2,SE,178373,4353544,2016-06-30,2016-07-03,WILDLIFE ACOUSTICS SM2Bat+,Wildlife Acoustics SMX-US,,2.0,,vegetation,,,,shrubland,FS,Sonobat 3.x,-108.72806,39.271585
61,NE,61,NE,548408,4211808,2016-07-11,2016-07-14,WILDLIFE ACOUSTICS SM2Bat-192,Wildlife Acoustics SMX-US,,3.0,,vegetation,,,,prairie,FS,Sonobat 3.x,-104.448246,38.052719


In [4]:
# survey_nights is a file produce by the notebook: ConvertSonobat3xoutputToDataFrames.ipynb
# It is derived from the sonobat 3.x output files producec by Jeremy Siemers.
recordings_fname = r"c:\temp\2016_survey_nights.csv"
recordings = pd.read_csv(recordings_fname, parse_dates=['filedatetime'], dtype={'grtsid': int})

recordings = recordings.rename({'grtsid':'GridCellID', 'sitename':'SiteID'}, axis=1)
recordings = recordings.set_index(['GridCellID', 'SiteID'], drop=False)
recordings.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,GridCellID,SiteID,filedatetime,monitoringnight
GridCellID,SiteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1005,NE,1005,NE,2016-07-22 20:50:30,2016-07-22
1005,NE,1005,NE,2016-07-22 21:25:03,2016-07-22
1005,NE,1005,NE,2016-07-22 21:25:49,2016-07-22
1005,NE,1005,NE,2016-07-22 21:37:04,2016-07-22
1005,NE,1005,NE,2016-07-22 21:42:12,2016-07-22
1005,NE,1005,NE,2016-07-22 21:45:14,2016-07-22
1005,NE,1005,NE,2016-07-22 21:45:28,2016-07-22
1005,NE,1005,NE,2016-07-22 21:47:13,2016-07-22
1005,NE,1005,NE,2016-07-22 21:52:03,2016-07-22
1005,NE,1005,NE,2016-07-22 21:58:02,2016-07-22


In [5]:
first_last = recordings.groupby(recordings.monitoringnight, as_index=False).filedatetime.agg(['min', 'max'])
first_last.head(4)

Unnamed: 0_level_0,min,max
monitoringnight,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-05-28,2016-05-28 20:48:59,2016-05-28 21:21:41
2016-05-29,2016-05-29 20:50:34,2016-05-30 05:16:13
2016-05-30,2016-05-30 20:58:31,2016-05-31 05:18:49
2016-05-31,2016-05-31 20:54:49,2016-06-01 05:13:33


### Since this contains a row for each recording we need to collapse it to a single row per site night

In [6]:
detector_covariates = recordings[['GridCellID', 'SiteID', 'monitoringnight']].join(site_md[['Lat', 'Long']], how='right').drop_duplicates()
detector_covariates.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,GridCellID,SiteID,monitoringnight,Lat,Long
GridCellID,SiteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2,NE,2.0,NE,2016-06-30,39.332177,-108.748456
2,NE,2.0,NE,2016-07-01,39.332177,-108.748456
2,NE,2.0,NE,2016-07-02,39.332177,-108.748456
2,NE,2.0,NE,2016-07-03,39.332177,-108.748456
2,NW,2.0,NW,2016-06-30,39.290652,-108.821023


In [7]:
detector_covariates = detector_covariates.join(first_last, on='monitoringnight')
detector_covariates = detector_covariates.rename({'min':'first_recording', 'max':'last_recording'}, axis=1)
detector_covariates.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,GridCellID,SiteID,monitoringnight,Lat,Long,first_recording,last_recording
GridCellID,SiteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2,NE,2.0,NE,2016-06-30,39.332177,-108.748456,2016-06-30 20:52:04,2016-07-01 05:37:08
2,NE,2.0,NE,2016-07-01,39.332177,-108.748456,2016-07-01 20:43:05,2016-07-02 05:28:44
2,NE,2.0,NE,2016-07-02,39.332177,-108.748456,2016-07-02 20:43:19,2016-07-03 05:29:54
2,NE,2.0,NE,2016-07-03,39.332177,-108.748456,2016-07-03 20:42:10,2016-07-04 05:33:27
2,NW,2.0,NW,2016-06-30,39.290652,-108.821023,2016-06-30 20:52:04,2016-07-01 05:37:08


In [8]:
detector_covariates['monitoringnight'] = pd.to_datetime(detector_covariates.monitoringnight, box=True)

In [9]:
detector_covariates[detector_covariates.monitoringnight.isna()]

# per personal comunication with Jeremy Siemers 381 NW was not measured
detector_covariates = detector_covariates.drop((381, 'NW'))

### Add in Sunrise, Sunset, and Moon Phase

In [10]:

a = Astral()
a.solar_depression = 'civil'
denver = a.geocoder['Denver']


detector_covariates['moon_phase'] = detector_covariates.monitoringnight.apply(lambda x: a.moon_phase(date=x))

In [11]:
def calc_sun_metrics(row):
    
    monitoringnight = denver.sun(date=row.monitoringnight, local=True)
    monitoringmorn = denver.sun(date=row.monitoringnight + datetime.timedelta(days=1), local=True)
    
    sunset = monitoringnight['sunset']
    dusk = monitoringnight['dusk']

    dawn = monitoringmorn['dawn']
    sunrise = monitoringmorn['sunrise']
    
    return (sunset, dusk, dawn, sunrise)


detector_covariates['sunset'], detector_covariates['dusk'], detector_covariates['dawn'], detector_covariates['sunrise'] = zip(*detector_covariates.apply(calc_sun_metrics, axis=1))

for metric in ['sunset', 'dusk', 'dawn', 'sunrise']:
    detector_covariates[metric] = detector_covariates[metric].dt.strftime('%X')

In [12]:
detector_covariates.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,GridCellID,SiteID,monitoringnight,Lat,Long,first_recording,last_recording,moon_phase,sunset,dusk,dawn,sunrise
GridCellID,SiteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2,NE,2.0,NE,2016-06-30,39.332177,-108.748456,2016-06-30 20:52:04,2016-07-01 05:37:08,23,20:31:56,21:04:28,05:03:14,05:35:43
2,NE,2.0,NE,2016-07-01,39.332177,-108.748456,2016-07-01 20:43:05,2016-07-02 05:28:44,24,20:31:51,21:04:20,05:03:47,05:36:13
2,NE,2.0,NE,2016-07-02,39.332177,-108.748456,2016-07-02 20:43:19,2016-07-03 05:29:54,25,20:31:44,21:04:11,05:04:21,05:36:45
2,NE,2.0,NE,2016-07-03,39.332177,-108.748456,2016-07-03 20:42:10,2016-07-04 05:33:27,26,20:31:35,21:03:59,05:04:56,05:37:17
2,NW,2.0,NW,2016-06-30,39.290652,-108.821023,2016-06-30 20:52:04,2016-07-01 05:37:08,23,20:31:56,21:04:28,05:03:14,05:35:43
2,NW,2.0,NW,2016-07-01,39.290652,-108.821023,2016-07-01 20:43:05,2016-07-02 05:28:44,24,20:31:51,21:04:20,05:03:47,05:36:13
2,NW,2.0,NW,2016-07-02,39.290652,-108.821023,2016-07-02 20:43:19,2016-07-03 05:29:54,25,20:31:44,21:04:11,05:04:21,05:36:45
2,NW,2.0,NW,2016-07-03,39.290652,-108.821023,2016-07-03 20:42:10,2016-07-04 05:33:27,26,20:31:35,21:03:59,05:04:56,05:37:17
2,SE,2.0,SE,2016-06-30,39.271585,-108.72806,2016-06-30 20:52:04,2016-07-01 05:37:08,23,20:31:56,21:04:28,05:03:14,05:35:43
2,SE,2.0,SE,2016-07-01,39.271585,-108.72806,2016-07-01 20:43:05,2016-07-02 05:28:44,24,20:31:51,21:04:20,05:03:47,05:36:13


### Add in Daymet weather covariates

In [13]:
site_weather = {}

for site in site_md[['Long', 'Lat']].drop_duplicates().iterrows():
    lat = site[1].Lat
    long = site[1].Long
    
    df = daymet.get_daymet_singlepixel(longitude=long, latitude=lat, 
                                   years=[2016,2017], variables = ['tmax', 'tmin', 'srad', 'vp', 'swe', 'prcp', 'dayl'])
    df = df[['tmax', 'tmin', 'srad', 'vp', 'swe', 'prcp', 'dayl']]
    site_weather[(long, lat)] = df

making request for latitude, longitude: 39.29065234874705, -108.8210234682361
processing data from request: https://daymet.ornl.gov/data/send/saveData?lat=39.29065234874705&lon=-108.8210234682361&measuredParams=tmax,tmin,srad,vp,swe,prcp,dayl&year=2016,2017&lat=39.29065234874705&lon=-108.8210234682361&vars=tmax%2Ctmin%2Csrad%2Cvp%2Cswe%2Cprcp%2Cdayl&years=2016%2C2017
making request for latitude, longitude: 39.279237574693425, -108.77562221560551
processing data from request: https://daymet.ornl.gov/data/send/saveData?lat=39.279237574693425&lon=-108.77562221560551&measuredParams=tmax,tmin,srad,vp,swe,prcp,dayl&year=2016,2017&lat=39.279237574693425&lon=-108.77562221560551&vars=tmax%2Ctmin%2Csrad%2Cvp%2Cswe%2Cprcp%2Cdayl&years=2016%2C2017
making request for latitude, longitude: 39.33217723772988, -108.74845645359268
processing data from request: https://daymet.ornl.gov/data/send/saveData?lat=39.33217723772988&lon=-108.74845645359268&measuredParams=tmax,tmin,srad,vp,swe,prcp,dayl&year=2016

In [14]:
def add_weather_covariates(row):
    date_str = row.monitoringnight.strftime('%Y-%m-%d')
    
    try:
        df = site_weather[(row.Long, row.Lat)]
        row = df[date_str].iloc[0, ::2]
        return (row.tmin, row.tmax, row.srad, row.vp, row.prcp)
    except:
        return [np.nan] * 5



detector_covariates['tmin'], detector_covariates['tmax'], detector_covariates['srad'], detector_covariates['vp'], detector_covariates['prcp'] = zip(*detector_covariates.apply(add_weather_covariates, axis=1))

In [15]:
row = detector_covariates.iloc[0]
date_str = row.monitoringnight.strftime('%Y-%m-%d')

df = site_weather[(row.Long, row.Lat)].iloc[::2]
df.head()#.plot.scatter(x='tmin', y='tmax')
df[date_str]

Unnamed: 0,tmax,tmax.1,tmin,tmin.1,srad,srad.1,vp,vp.1,swe,swe.1,prcp,prcp.1,dayl,dayl.1
2016-06-30,31.5,31.5,16.0,16.0,329.600006,329.600006,1320.0,1320.0,0.0,0.0,7.0,7.0,52876.800781,52876.800781


In [16]:
detector_covariates.to_csv(out_dir.joinpath("DetectorCovariates.csv"))
detector_covariates.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,GridCellID,SiteID,monitoringnight,Lat,Long,first_recording,last_recording,moon_phase,sunset,dusk,dawn,sunrise,tmin,tmax,srad,vp,prcp
GridCellID,SiteID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2,NE,2.0,NE,2016-06-30,39.332177,-108.748456,2016-06-30 20:52:04,2016-07-01 05:37:08,23,20:31:56,21:04:28,05:03:14,05:35:43,16.0,31.5,329.600006,1320.0,7.0
2,NE,2.0,NE,2016-07-01,39.332177,-108.748456,2016-07-01 20:43:05,2016-07-02 05:28:44,24,20:31:51,21:04:20,05:03:47,05:36:13,15.5,24.5,211.199997,1720.0,16.0
2,NE,2.0,NE,2016-07-02,39.332177,-108.748456,2016-07-02 20:43:19,2016-07-03 05:29:54,25,20:31:44,21:04:11,05:04:21,05:36:45,15.0,27.0,275.200012,1560.0,15.0
2,NE,2.0,NE,2016-07-03,39.332177,-108.748456,2016-07-03 20:42:10,2016-07-04 05:33:27,26,20:31:35,21:03:59,05:04:56,05:37:17,16.0,28.5,281.600006,1720.0,20.0
2,NW,2.0,NW,2016-06-30,39.290652,-108.821023,2016-06-30 20:52:04,2016-07-01 05:37:08,23,20:31:56,21:04:28,05:03:14,05:35:43,17.0,32.5,332.799988,1200.0,4.0
