In [None]:
import sys
!{sys.executable} -m pip install tables

import os
from populator import aggregate_data

data_dir = 'data'
exchange = 'test'
write_csv = False
write_hdf = True
# Intervals other than '1Min' over which to downsample data and store on disk
downsampling_intervals = ['5Min', '15Min', '1h', '1d']

In [None]:
pairs = []
for filename in os.listdir(os.path.join(data_dir, exchange)):
    currencies = filename.split('-')[:2]
    pairs.append('/'.join(currencies))
print('Found following pairs for exchange {}: {}'.format(exchange, pairs))

In [None]:
df_1m = aggregate_data('data', {exchange: pairs}, '1m', '2000-01-01T00:00:00Z')
print('Assembled aggregate dataframe')

In [None]:
df_1m.head()

In [None]:
import numpy as np

def downsample(df, interval):
    # Sum over all volumes and average over all prices in each interval
    agg_fns = {col: np.sum if col.startswith('volume') else np.mean for col in df}
    return df.resample(interval).agg(agg_fns)

In [None]:
dfs = [(df_1m, '1m')]
for interval in downsampling_intervals:
    print('Downsampling {} data to {}... '.format(exchange, interval), end='')
    df_down = downsample(df_1m, interval)
    print('Done')
    dfs.append((df_down, interval))

In [None]:
for df, interval in dfs:
    path_prefix = os.path.join(data_dir, exchange, 'all-' + interval)
    if write_csv:
        df.to_csv(path_prefix + '.csv', index_label='timestamp')
        print('Wrote {}'.format(path_prefix + '.csv'))
    if write_hdf:
        df.to_hdf(path_prefix + '.h5', key = exchange + '_' + interval, mode='w')
        print('Wrote {}'.format(path_prefix + '.h5'))