In [2]:
import pandas as pd
from d2i_tools2 import getMeta
import warnings
warnings.simplefilter("ignore")

## [01] read and extract meta data into df

In [6]:
metadf = getMeta()

print(f"** Dataframe of extracted meta data has the shape of: {metadf.shape}. **\n")
print(metadf.info())



** Dataframe of extracted meta data has the shape of: (221, 21). **

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 221 entries, 0 to 220
Data columns (total 21 columns):
 #   Column                                        Non-Null Count  Dtype              
---  ------                                        --------------  -----              
 0   name                                          221 non-null    object             
 1   id                                            221 non-null    object             
 2   parent_fxf                                    221 non-null    object             
 3   permalink                                     221 non-null    object             
 4   description                                   221 non-null    object             
 5   data_upd_at                                   199 non-null    datetime64[ns, UTC]
 6   pv_last_wk                                    221 non-null    int64              
 7   pv_last_mth                           

## [02] Example to select dataset(s) using their unique dataset id

In [9]:
# below are the 3 datasets that make up the live parking sensors data whole

dfparking = metadf[metadf.id.isin(['vh2v-4nfs', 'crvt-b4kt', 'ntht-5rk7'])]
dfparking.T

# NOTE: permalink is url to the corresponding dataset on the Melbourne Open Data portal

Unnamed: 0,3,6,11
name,on-street parking bay sensors,on-street parking bays,on-street car park bay restrictions
id,vh2v-4nfs,crvt-b4kt,ntht-5rk7
parent_fxf,[],[],[]
permalink,https://data.melbourne.vic.gov.au/d/vh2v-4nfs,https://data.melbourne.vic.gov.au/d/crvt-b4kt,https://data.melbourne.vic.gov.au/d/ntht-5rk7
description,contains information from in-ground car parkin...,this dataset contains spatial polygons which r...,each row contains information about the restri...
data_upd_at,2021-11-19 02:26:29+00:00,2021-08-23 16:04:33+00:00,2021-09-14 16:04:00+00:00
pv_last_wk,157,126,70
pv_last_mth,643,514,294
pv_total,38600,25348,17916
download_count,12084831,39776,602743


## [03a] Example to inspect meta data of interest -->  "domain tags"

In [10]:
dfparking[dfparking['id']=='vh2v-4nfs']['domain_tags'].values[0]

['parking',
 'sensor',
 'near real-time',
 'vacancy',
 'transport',
 'travel',
 'disability',
 'accessibility',
 'sensors',
 'safemobility']

## [03b] Example to inspect meta data of interest -->  "Quality_Known-Issues"

In [11]:
dfparking[dfparking['id']=='vh2v-4nfs']['Quality_Known-Issues'].values[0]

'parking sensors are not operational on public holidays. parking sensors will show car parks as vacant when blocked by construction zones. '

## [04] Checking meta data on 2 pedestrian count datasets

In [3]:
# with the below 2 datasets, we can get a live geospatial snapshot of the pedestrian counts in City of Melbourne

dfpedestrian = metadf[metadf.id.isin(['d6mv-s43h', 'h57g-5234'])] 
dfpedestrian.T

# .. and so on

Unnamed: 0,28,30
name,pedestrian counting system - past hour (counts...,pedestrian counting system - sensor locations
id,d6mv-s43h,h57g-5234
parent_fxf,[],[]
permalink,https://data.melbourne.vic.gov.au/d/d6mv-s43h,https://data.melbourne.vic.gov.au/d/h57g-5234
description,<b>current issue 23/09/2020</b>\nplease note: ...,"this dataset contains status, location and dir..."
data_upd_at,2021-11-21 06:01:19+00:00,2021-09-21 16:44:14+00:00
pv_last_wk,34,52
pv_last_mth,96,206
pv_total,7935,7584
download_count,20794,2196


In [30]:
metadf[['name','id','data_upd_at','pv_last_wk','pv_last_mth','pv_total','download_count']]

Unnamed: 0,name,id,data_upd_at,pv_last_wk,pv_last_mth,pv_total,download_count
0,pedestrian counting system - monthly (counts p...,b2ak-trbp,2021-11-04 22:38:23+00:00,111,779,73850,8834
1,tree canopies 2011 (urban forest),y79a-us3f,NaT,3,33,66899,3178
2,"trees, with species and dimensions (urban forest)",fp38-wiyy,2021-09-30 16:06:54+00:00,40,269,39834,6395
3,on-street parking bay sensors,vh2v-4nfs,2021-11-19 05:28:28+00:00,152,639,38601,12085197
4,pay stay zones linked to street segments,7q9g-yyvg,2018-03-02 01:46:14+00:00,112,467,32074,1819
...,...,...,...,...,...,...,...
216,cardboard bales collected at degraves st recyc...,hisn-tyup,2017-05-12 06:23:18+00:00,0,3,225,848
217,city of melbourne population forecasts by smal...,sp4r-xphj,2021-08-12 06:45:16+00:00,7,64,109,17
218,city of melbourne jobs forecasts by small area...,73qa-862u,2021-08-12 07:19:52+00:00,12,28,52,13
219,city of melbourne dwellings and household fore...,jizx-9ayp,2021-08-12 07:01:29+00:00,2,16,47,7


## [05] Single keyword search on dataset name

In [None]:
# trying to get same results as keyword_search() in :
# https://bitbucket-students.deakin.edu.au/projects/D2IC-PG/repos/d2i---melbourne-city/browse/webapp/flaskr/dataset_search.py

In [7]:
keyword = 'cafe'

mask = metadf['name'].str.contains(keyword)
metadf[mask][['name','id','download_count','permalink']]

Unnamed: 0,name,id,download_count,permalink
7,"cafes and restaurants, with seating capacity",xt2y-tnn9,16176,https://data.melbourne.vic.gov.au/d/xt2y-tnn9
60,"cafe, restaurant, bistro seats 2020",dyqx-cfn5,650,https://data.melbourne.vic.gov.au/d/dyqx-cfn5
138,"cafe, restaurant, bistro seats 2017 map",w2pi-36nk,207,https://data.melbourne.vic.gov.au/d/w2pi-36nk


## [06] Logging meta data daily via crontab to explore trends

In [8]:
import os, sys
from datetime import datetime
import pandas as pd
from d2i_tools2 import getMeta

folder = "datasets"
meta_fname, track_fname = "meta_log.csv", "meta_log_track.txt"
meta_log = os.path.join(folder, meta_fname)
track_log = os.path.join(folder, track_fname)

def log_meta(apptoken=None):
    metadf = getMeta(apptoken)
    metadf['log_time'] = datetime.now()
    log_cols = ['name','id','data_upd_at','pv_last_wk','pv_last_mth','pv_total','download_count','log_time']
    if not os.path.isfile(meta_log):
        metadf[log_cols].to_csv(meta_log, mode='w', header=True, index=False)  # first write
        sys.stdout = open(track_log,'w') # write log
        print(f"{datetime.now()}, [meta log] written to file")
    else:
        metadf[log_cols].to_csv(meta_log, mode='a', header=True, index=False)  # subsequent writes
        sys.stdout = open(track_log,'a') # write log
        print(f"{datetime.now()}, [meta log] appended to file")