In [1]:
import pandas as pd
import datetime as dt
from datetimerange import DateTimeRange as dtr

def timeExtract(data, time1, time2, timefield):
    '''all inputs are strings. Extracts data from one pandas dataframe to another if it falls within a given timespan.'''
    #for processing flow monitor data, remember that time data will be in UTC (7 hours later than PST)
    
    d = pd.read_csv(data)
    tr = dtr(time1, time2)
    #it is necessary to drop everything past minutes in order to guarantee that the time will match between geocoding and 
    #when the readings were taken. Remember to join the output files before converting them to anything else.
    d['truedate'] = pd.to_datetime(d[timefield]).dt.floor('T')
    
    d2 = pd.DataFrame()
    
    for i in range(len(d)):
        if d['truedate'].iloc[i] in tr:
            d2 = d2.append(d.iloc[i])

    return d2


In [3]:
day1p1 = timeExtract("myflowdata/flow/user_1304855_1652657426/user_positions_20220412_20220512_1.csv",
                            "2022-04-12 18:20:00", "2022-04-12 20:28:00", "date")

day1p2 = timeExtract("myflowdata/flow/user_1304855_1652657426/user_positions_20220412_20220512_1.csv",
                            "2022-04-12 20:41:00", "2022-04-12 21:40:00", "date")

day2 = timeExtract("myflowdata/flow/user_1304855_1652657426/user_positions_20220412_20220512_1.csv",
                            "2022-05-10 22:00:00", "2022-05-11 00:00:00", "date")

In [5]:
extractedData = pd.concat([day1p1,day1p2, day2])

In [6]:

print(extractedData['truedate'])

1      2022-04-12 18:20:00
2      2022-04-12 18:20:00
3      2022-04-12 18:21:00
4      2022-04-12 18:21:00
5      2022-04-12 18:22:00
               ...        
3446   2022-05-10 23:53:00
3447   2022-05-10 23:54:00
3448   2022-05-10 23:54:00
3449   2022-05-10 23:55:00
3450   2022-05-10 23:59:00
Name: truedate, Length: 453, dtype: datetime64[ns]


In [7]:
extractedData.to_csv('mySurvey.csv')

In [9]:
day1p1stat = timeExtract("myflowdata/flow/user_1304855_1652657426/user_measures_20220412_20220504_1.csv",
                            "2022-04-12 18:20:00", "2022-04-12 20:28:00", "date (UTC)")

day1p2stat = timeExtract("myflowdata/flow/user_1304855_1652657426/user_measures_20220412_20220504_1.csv",
                            "2022-04-12 20:41:00", "2022-04-12 21:40:00", "date (UTC)")

day2stat = timeExtract("myflowdata/flow/user_1304855_1652657426/user_measures_20220504_20220512_2.csv",
                            "2022-05-10 22:00:00", "2022-05-11 00:00:00", "date (UTC)")

In [10]:
extractedStat = pd.concat([day1p1stat,day1p2stat, day2stat])

In [11]:
print(extractedStat)

         timestamp           date (UTC)  NO2 (ppb)  VOC (ppb)  pm 10 (ug/m3)  \
8     1.649788e+09  2022-04-12 18:20:09      446.0       15.0      26.473090   
9     1.649788e+09  2022-04-12 18:21:09      451.0       14.0      16.536581   
10    1.649788e+09  2022-04-12 18:22:09      181.0       15.0       5.458352   
11    1.649788e+09  2022-04-12 18:23:09      181.0       14.0      41.745377   
12    1.649788e+09  2022-04-12 18:24:09      183.0       14.0       5.063039   
...            ...                  ...        ...        ...            ...   
9178  1.652227e+09  2022-05-10 23:56:45       32.0       25.0       3.000000   
9179  1.652227e+09  2022-05-10 23:57:45       30.0       28.0       3.000000   
9180  1.652227e+09  2022-05-10 23:58:45       25.0      126.0       3.000000   
9181  1.652227e+09  2022-05-10 23:59:45       26.0       99.0       3.000000   
9182  1.652227e+09  2022-05-11 00:00:45       26.0       77.0       3.000000   

      pm 2.5 (ug/m3)  NO2 (Plume AQI)  

In [13]:
extractedStat.to_csv('mystats.csv')