In [148]:
import pandas as pd


In [2]:
# geoPolyClip(gdf, polygon)
# filterById(ids_list, id_col_name, df)

# 029_d6mv-s43h__bufferred.csv
# 031_h57g-5234__list.csv

## [01] pedestrian count sensors "location list"

In [149]:
dfpc = pd.read_csv('datasets/031_h57g-5234__list.csv')
dfpc.head(2)

## has lat, lon for each sensor, along with other info
## note each sensor has counts in 2 opposite directions
## --> some north / south, others east / west

Unnamed: 0,sensor_id,sensor_description,sensor_name,installation_date,status,direction_1,direction_2,latitude,longitude,location,note
0,55,Elizabeth St-La Trobe St (East),Eli380_T,2018-07-19T00:00:00.000,A,North,South,-37.809889,144.961343,"{'latitude': '-37.80988941', 'longitude': '144...",
1,45,Little Collins St-Swanston St (East),Swa148_T,2017-06-29T00:00:00.000,A,North,South,-37.814141,144.966094,"{'latitude': '-37.81414074', 'longitude': '144...",


In [150]:
dfpc.direction_1.value_counts()

North    23
East     22
South    16
West     13
Name: direction_1, dtype: int64

## [02] pedestrian count sensors "time collected live data"

In [151]:
## I've been making hourly reads of this pedestrian live snapshot data since 30-Sep-2021
## read the time collected data so far into dataframe

dfpch = pd.read_csv('datasets/029_d6mv-s43h__bufferred.csv')
dfpch = dfpch.drop_duplicates()  # drop duplicates in collected data
dfpch['date_time'] = pd.to_datetime(dfpch['date_time'], infer_datetime_format=True)
dfpch['sensor_id'] = dfpch['sensor_id'].apply(str)
dfpch.head(2)

Unnamed: 0,sensor_id,direction_1,direction_2,date_time
0,76,0,1,2021-09-30 10:41:00
1,2,1,1,2021-09-30 10:41:00


## [03] functions to time resample this data

In [152]:
def pedCount_resample(df_, bin='15min'):
    '''
    take time collected dataframe of pedestrian data
    use pedCount_getDir to get resampled data in both directions
    return merged resampled dataframe with both directions
    '''
    df1 = pedCount_getDir(df_, 'direction_1', bin)
    df2 = pedCount_getDir(df_, 'direction_2', bin)
    df = df1.merge(df2, left_index=True, right_on=['sensor_id','date_time'], how='outer')
    df['sensor_id'] = df.index.get_level_values('sensor_id').astype('int')
    df.index = df.index.get_level_values('date_time')
    return df[['sensor_id','direction_1','direction_2']]    

In [153]:
def pedCount_getDir(df_, direction, bin):
    '''
    take time collected dataframe of pedestrian data
    and direction of sensor, and resampling bin size
    return dataframe of resample data in specified direction
    '''
    df = df_.copy()
    
    # pivot data of specified direction, and fillna in prep for resampling
    df = pd.pivot(df[['sensor_id',direction,'date_time']], 
                  index='date_time',  columns='sensor_id', values=direction)
    df = df.fillna(0).astype('int').reset_index()
    df = pd.melt(df, id_vars='date_time', value_name=direction)
    df = df.set_index('date_time')
    
    # resample according to specified bin
    df = df.groupby('sensor_id').resample(bin).sum()

    return df

In [154]:
## testing out function above with read in data

d = pedCount_resample(dfpch,'15min')
d

Unnamed: 0_level_0,sensor_id,direction_1,direction_2
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-09-30 10:30:00,1,9,1
2021-09-30 10:45:00,1,29,28
2021-09-30 11:00:00,1,19,34
2021-09-30 11:15:00,1,27,30
2021-09-30 11:30:00,1,16,26
...,...,...,...
2021-11-23 13:30:00,9,39,27
2021-11-23 13:45:00,9,33,29
2021-11-23 14:00:00,9,33,28
2021-11-23 14:15:00,9,31,23


## [04] function to filter time collected data with direction and id list

In [155]:
def pedCount_dirCount(df_, dfll_, direction, id_lst=None):
    '''
    take time collected pedestrian data 'df_'
    and pedestrian count sensor location list 'dfll_'
    and 'direction' of sensor you want filtered
    and list of sensor ids 'id_lst'
    return dataframe of time collected data
        filtered by sensors id list, and 
        in 'direction' specified
    '''
    # break up time collected data into direction 1 and 2 dataframes
    # (that following the specified direction)
    df1 = df_[['sensor_id','direction_1']].rename(columns={'direction_1':direction})
    df2 = df_[['sensor_id','direction_2']].rename(columns={'direction_2':direction})
    
    # break up the sensors id list into direct 1 and 2 data series
    # (that follow the specified direction)
    dfll = dfll_.copy()
    ds_sensors_lst1 = dfll[dfll['direction_1'].isin([direction])]['sensor_id']
    ds_sensors_lst2 = dfll[dfll['direction_2'].isin([direction])]['sensor_id']
    
    # if no id_lst specified, assume we want all sensors that has specified direction data
    # if id_lst specified (e.g. from geo filtered within a specified area)
    # then filter sensors location list on id_lst
    if id_lst != None:
        ds_sensors_lst1 = ds_sensors_lst1[ds_sensors_lst1.isin(id_lst)]
        ds_sensors_lst2 = ds_sensors_lst2[ds_sensors_lst2.isin(id_lst)]
    
    df1 = df1[df1['sensor_id'].isin(list(ds_sensors_lst1))] # filter df1 with ds_sensors_lst1
    df2 = df2[df2['sensor_id'].isin(list(ds_sensors_lst2))] # filter df2 with ds_sensors_lst2
    return df1.append(df2)  

In [156]:
## testing out function above

dd = pedCount_dirCount(d, dfpc, 'North', [3,14,8,22,66])
dd

Unnamed: 0_level_0,sensor_id,North
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-30 10:30:00,14,6
2021-09-30 10:45:00,14,22
2021-09-30 11:00:00,14,20
2021-09-30 11:15:00,14,20
2021-09-30 11:30:00,14,9
...,...,...
2021-11-23 13:30:00,66,164
2021-11-23 13:45:00,66,136
2021-11-23 14:00:00,66,140
2021-11-23 14:15:00,66,114


In [159]:

ee = pedCount_dirCount(d, dfpc, 'South', [3,14,8,22,66])
ee

Unnamed: 0_level_0,sensor_id,South
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-09-30 10:30:00,22,20
2021-09-30 10:45:00,22,80
2021-09-30 11:00:00,22,66
2021-09-30 11:15:00,22,82
2021-09-30 11:30:00,22,54
...,...,...
2021-11-23 13:30:00,8,32
2021-11-23 13:45:00,8,23
2021-11-23 14:00:00,8,31
2021-11-23 14:15:00,8,25


In [157]:
dfpc.head(3)

Unnamed: 0,sensor_id,sensor_description,sensor_name,installation_date,status,direction_1,direction_2,latitude,longitude,location,note
0,55,Elizabeth St-La Trobe St (East),Eli380_T,2018-07-19T00:00:00.000,A,North,South,-37.809889,144.961343,"{'latitude': '-37.80988941', 'longitude': '144...",
1,45,Little Collins St-Swanston St (East),Swa148_T,2017-06-29T00:00:00.000,A,North,South,-37.814141,144.966094,"{'latitude': '-37.81414074', 'longitude': '144...",
2,61,RMIT Building 14,RMIT14_T,2019-06-28T00:00:00.000,A,North,South,-37.807675,144.963091,"{'latitude': '-37.80767455', 'longitude': '144...",
