In [None]:
import os
import json
import pandas as pd
import datetime
from google.cloud import storage

In [None]:
# connect to gcloud bucket
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'survai-data-connect.json'
storage_client = storage.Client()
bucket_name = 'survai-dataset'
bucket = storage_client.bucket(bucket_name)

In [None]:
# access temp_videodata_storage folder
blobs = storage_client.list_blobs(bucket_name, prefix='datasets/test_datasets/sparverius_pf/results_08052022/temp_videodata_storage/')
data = []

for blob in blobs:

    file = blob.name.split('/')[5]
    i = file.count('_')

    if i > 1:
        file_name = "_".join(file.split("_", i)[:i]) 
    else:
        file_name = file.split('_')[0]

    if 'od.json' in file:
        od = json.loads(blob.download_as_string())
    
        # change seconds to datetime
        for k,v in list(od.items()):
            newkey = str(datetime.timedelta(seconds=int(k)))
            od[newkey] = od.pop(k)
        
        data.append({
            'file_name': f'{file_name}.mp4',
            'detections_per_second': od
        })

    if 'stats.json' in file:
        stats = json.loads(blob.download_as_string())
        
        for k,v in stats.items():
            data.append({
            'file_name': f'{file_name}.mp4',
            k: v
        })

In [None]:
df = pd.DataFrame(data)
df = df.drop(['person_on_ground', 'spray', 'violent_actions', 'crowd', 
                'striking', 'running', 'throwing', 'brawling', 'arresting'], axis=1)
aggregate_functions = {'detections_per_second': 'first', 'Non Uniformed': 'sum', 
                        'Uniformed': 'sum', 'Riot Shield': 'sum', 'Gun': 'sum', 'Pepper Spray': 'sum',
                        'Baton': 'sum', 'Chemical Smoke': 'sum'}

df_new = df.groupby('file_name', as_index=False).aggregate(aggregate_functions).reindex(columns=df.columns)

In [100]:
df_new

Unnamed: 0,file_name,detections_per_second,Non Uniformed,Uniformed,Riot Shield,Gun,Pepper Spray,Baton,Chemical Smoke
379,VID_20211024_105303.mp4,"{'0:00:01': {'Pepper Spray': 1}, '0:00:02': {'...",2.0,0.0,0.0,0.0,11.0,0.0,0.0
380,VID_20211024_105919.mp4,"{'0:00:05': {'Non Uniformed': 1}, '0:00:06': {...",54.0,0.0,5.0,0.0,0.0,0.0,1.0
381,VID_20211024_110218.mp4,"{'0:00:01': {'Non Uniformed': 1}, '0:00:02': {...",29.0,0.0,0.0,0.0,3.0,0.0,0.0
382,VID_20211024_112001.mp4,{},0.0,0.0,0.0,0.0,0.0,0.0,0.0
383,VID_20211111_100802.mp4,"{'0:00:03': {'Riot Shield': 1}, '0:00:04': {'R...",0.0,0.0,2.0,0.0,0.0,0.0,0.0
384,VID_20211114_104303.mp4,"{'0:00:04': {'Non Uniformed': 1}, '0:00:05': {...",11.0,0.0,0.0,0.0,0.0,0.0,0.0
385,VID_20211118_232337_347.mp4,{},0.0,0.0,0.0,0.0,0.0,0.0,0.0
386,VID_20211123_093820.mp4,{},0.0,0.0,0.0,0.0,0.0,0.0,0.0
387,VID_20211123_100008.mp4,{},0.0,0.0,0.0,0.0,0.0,0.0,0.0
388,VID_20211123_101900.mp4,"{'0:00:01': {'Riot Shield': 1}, '0:00:02': {'R...",0.0,0.0,4.0,0.0,0.0,0.0,0.0


In [None]:
df_new.to_json('pf_detections.json', orient='index')
df_new.to_csv('pf_detections.csv')