In [2]:
# Wide display
from IPython.core.display import display, HTML
display(HTML("<style>#notebook-container { margin-left:-14px; width:calc(100% + 27px) !important; }</style>"))

In [3]:
import csv, datetime, fcntl, glob, json, math, os, re, subprocess, sys, time, urllib2, xml.dom.minidom, pytz
from dateutil import rrule
from dateutil import tz
import dateutil.parser
import numpy as np

# To install dateutil on Ubuntu
# sudo pip install python-dateutil

def exec_ipynb(filename_or_url):
    nb = (urllib2.urlopen(filename_or_url) if re.match(r'https?:', filename_or_url) else open(filename_or_url)).read()
    jsonNb = json.loads(nb)
    #check for the modified formatting of Jupyter Notebook v4
    if(jsonNb['nbformat'] == 4):
        exec '\n'.join([''.join(cell['source']) for cell in jsonNb['cells'] if cell['cell_type'] == 'code']) in globals()
    else:
        exec '\n'.join([''.join(cell['input']) for cell in jsonNb['worksheets'][0]['cells'] if cell['cell_type'] == 'code']) in globals()

exec_ipynb('python-utils/esdr-library.ipynb')

In [4]:
esdr = Esdr(None)
product = esdr.get_product_by_name('PurpleAir')
product

{u'created': u'2017-11-20T15:17:13.000Z',
 u'creatorUserId': 3,
 u'defaultChannelSpecs': {},
 u'description': u'PurpleAir',
 u'id': 69,
 u'modified': u'2017-11-20T15:17:13.000Z',
 u'name': u'PurpleAir',
 u'prettyName': u'PurpleAir',
 u'vendor': u'PurpleAir'}

In [5]:
feeds = []
# Approx bounding box for Allegheny County
region = 'latitude>40.192204,latitude<40.674084,longitude<-79.688618,longitude>-80.361022'
offset = 0
while True:
    response = esdr.api('GET', '/api/v1/feeds', {'whereAnd' : 'productId=%d,%s' % (product['id'], region), 'offset': str(offset)})
    data = response['data']
    totalCount = data['totalCount']
    nrecs = len(data['rows'])
    feeds.extend(data['rows'])
    limit = data['limit']
    total_recvd = offset + nrecs
    #print(offset, nrecs, limit, totalCount)
    if total_recvd >= totalCount:
        break
    offset += nrecs

print 'Retrieved %d feeds' % len(feeds)
print 'First feed: %s' % feeds[0]

Retrieved 183 feeds
First feed: {u'apiKeyReadOnly': u'2bd4d40a3059b5ef56a3f6a6b78386569da2c2580fcd5c42e497072119f7b79c', u'minTimeSecs': 1522247982.349, u'name': u'Monaca,PA PurpleAir', u'created': u'2018-03-28T14:51:40.000Z', u'isPublic': 1, u'userId': 3, u'modified': u'2018-12-23T20:27:30.000Z', u'longitude': -80.311057, u'channelBounds': {u'channels': {u'PM2_5': {u'maxTimeSecs': 1545595853.279, u'minTimeSecs': 1522247982.349, u'maxValue': 3810.59, u'minValue': 0}, u'stats_v': {u'maxTimeSecs': 1545595853.279, u'minTimeSecs': 1522247982.349, u'maxValue': 970.64, u'minValue': 0}, u'stats_pm': {u'maxTimeSecs': 1545595853.279, u'minTimeSecs': 1522247982.349, u'maxValue': 970.64, u'minValue': 0}, u'temp_f': {u'maxTimeSecs': 1545595853.279, u'minTimeSecs': 1522247982.349, u'maxValue': 105, u'minValue': 27}, u'humidity': {u'maxTimeSecs': 1545595853.279, u'minTimeSecs': 1522247982.349, u'maxValue': 84, u'minValue': 11}, u'pressure': {u'maxTimeSecs': 1545595853.279, u'minTimeSecs': 1522247982

In [10]:
def sample_feed(feed, interval, from_epoch, to_epoch):
    feed_id = feed['id']
    export_url = 'https://esdr.cmucreatelab.org/api/v1/feeds/%d/channels/PM0.3/export?from=%d&to=%d&format=json' % (feed_id, from_epoch, to_epoch)
    data = json.load(urllib2.urlopen(export_url))['data']
    
    
#     print('Feed %d has %d samples' % (feed_id, len(data)))
    if len(data) == 0:
#         print('No samples, skipping')
        return None

    
    dat = []
    start_time = from_epoch
    end_time = from_epoch + interval - 1

    prevSample = 0
    while((end_time+1) < to_epoch):
        curbin = []
        for sample in data:
            if(sample[0] >= start_time and sample[0] <= end_time):
                curbin.append(sample[1])
        newSample = prevSample
        if(len(curbin) > 0):    
            newSample =  sum(curbin)/len(curbin)
        if(newSample == 0):
            newSample = prevSample
     
        start_time += interval
        end_time += (interval-1)
        
        dat.append(newSample)
        if(newSample != 0):
            prevSample = newSample
    
    
        
    row = ['feed_%d' % feed_id, feed['latitude'], feed['longitude']] + list(dat)
 
    return row

    
    

In [11]:
sensor =  sample_feed(feeds[110], 1800, 1532822400, 1532822400+86400)
sensor.pop(0)
print sensor
print len(np.array(sensor).astype(np.float32).tobytes())

AttributeError: 'NoneType' object has no attribute 'pop'

In [12]:
def output_raw_binary(cur_date):
    
    #Gets datetime from string
    date_datetime = datetime.datetime.strptime(cur_date, "%Y-%m-%d")
    #Convert to datetime
    epoch_time = (pytz.timezone('America/New_York').localize(date_datetime) - datetime.datetime(1970, 1, 1, tzinfo=pytz.utc)).total_seconds()
    raw_data = []
    bin_data = []
    count = 0
    for feed in feeds:

        #Adds 24 hours to starting epoch_time
        #Get each sensor feed sampled at 30 mins
        sensor = sample_feed(feed, 300, epoch_time, epoch_time+86400)
        
        #Adds all non-NULL sensors to raw/bin data
        if(sensor is not None and len(sensor) > 0):
            count += 1
            raw_data.append(sensor)
            sensor.pop(0)
            bin_data.append(sensor)
    
    #Convert to binary
    raw_binary = np.array(bin_data).astype(np.float32).tobytes()

    #Outputs binary file
    path = "aggregates/"
    bin_outfile = open(path + 'purpleair03_'  + cur_date + 'tmp.bin', 'w+')
    bin_outfile.write(raw_binary)
    bin_outfile.close()

    #Outputs raw json file
    path = "aggregates/"
    with open(path + 'purpleair03_'  + cur_date + 'tmp.json', 'w+') as outfile:  
        json.dump(raw_data, outfile)

    os.rename(path + 'purpleair03_'  + cur_date + 'tmp.json', path + 'purpleair03_'  + cur_date + '.json')
    os.rename(path + 'purpleair03_'  + cur_date + 'tmp.bin',  path + 'purpleair03_'  + cur_date + '.bin')


In [13]:
#Get current time
curDate = datetime.datetime.now().strftime("%Y-%m-%d")


curDate = '2018-10-07'
while(curDate != '2019-01-30'):
    print(curDate)
    output_raw_binary(curDate)
    curDate = (datetime.datetime.strptime(curDate, '%Y-%m-%d')+datetime.timedelta(days=1)).strftime("%Y-%m-%d")


2018-10-07
2018-10-08
2018-10-09
2018-10-10
2018-10-11
2018-10-12
2018-10-13
2018-10-14
2018-10-15
2018-10-16
2018-10-17
2018-10-18
2018-10-19
2018-10-20
2018-10-21
2018-10-22
2018-10-23
2018-10-24
2018-10-25
2018-10-26
2018-10-27
2018-10-28
2018-10-29
2018-10-30
2018-10-31
2018-11-01
2018-11-02
2018-11-03
2018-11-04
2018-11-05
2018-11-06
2018-11-07
2018-11-08
2018-11-09
2018-11-10
2018-11-11
2018-11-12
2018-11-13
2018-11-14
2018-11-15
2018-11-16
2018-11-17
2018-11-18
2018-11-19
2018-11-20
2018-11-21
2018-11-22
2018-11-23
2018-11-24
2018-11-25
2018-11-26
2018-11-27
2018-11-28
2018-11-29
2018-11-30
2018-12-01
2018-12-02
2018-12-03
2018-12-04
2018-12-05
2018-12-06
2018-12-07
2018-12-08
2018-12-09
2018-12-10
2018-12-11
2018-12-12
2018-12-13
2018-12-14
2018-12-15
2018-12-16
2018-12-17
2018-12-18
2018-12-19
2018-12-20
2018-12-21
2018-12-22
2018-12-23
2018-12-24
2018-12-25
2018-12-26
2018-12-27
2018-12-28
2018-12-29
2018-12-30
2018-12-31
2019-01-01
2019-01-02
2019-01-03
2019-01-04
2019-01-05