In [None]:
import h5py
import os
import pandas as pd
import numpy as np
import sys
import altair as alt
from datetime import datetime, timedelta

In [None]:
sys.path.append('/'.join(os.getcwd().split('/')[:-1]) + '/library')
from searcher import Searcher

In [None]:
df = pd.read_csv('../data/audio-paths-rained.csv')

In [None]:
df['node'] = df['path'].str.split('/').str[2]

In [None]:
df['prediction_path'] = df['node'].map(lambda x: f'../sonyc/class_predictions/1.0.0/2017/{x}_class_predictions.h5')

In [None]:
df

In [None]:
df.drop_duplicates(subset=['node_timestamp', 'node'], inplace=True)

In [None]:
labels = [
    '1_engine', '2_machinery-impact',
    '3_non-machinery-impact', '4_powered-saw',
    '5_alert-signal', '6_music',
    '7_human-voice', '8_dog'
]

In [None]:
cache = {
    'path': None,
    'data': None,
    'timestamps': None
}

In [None]:
def get_predictions(row, cache, labels):
    if row['prediction_path'] != cache['path']:
        cache['path'] = row['prediction_path']
        cache['data'] = h5py.File(cache['path'], 'r')['coarse']
        cache['timestamps'] = pd.DataFrame(cache['data']['timestamp'], columns=['epoch'])

    
    timestamp = row['node_timestamp']
    
    index = cache['timestamps'][cache['timestamps']['epoch'] == timestamp].index[0]
    
    return list(cache['data'][index][labels])

prediction_df = df.apply(get_predictions, axis='columns', result_type='expand', args=[cache, labels])

In [None]:
prediction_df.columns = labels

In [None]:
df_all = pd.concat([df, prediction_df], axis=1)
df_all

In [None]:
columns_to_output = ['node_timestamp', 'precipitation[mm]', 'node'] + labels
clean_labels = [w[2:] for w in labels]
header_to_output = ['node_timestamp', 'precipitation[mm]', 'node'] + clean_labels
df_all.to_csv('../data/predictions.csv', columns=columns_to_output, index=False, header=header_to_output)