### Python Examples of:
- reading multiple csvs at once, ex: reading all ohlc csvs for one month, etc
- convert raw trades to ohlc, renko, volume

#### Structure of directory:
Data is saved in a csv for each day and for each type of resampling. 

They are organized into folders with the following structure: /year/month/all files for this month.

The data to be processed is direct on the directory

Example:
- Current directory: '/mnt/d/data'
- ohlc data for the 11/01/2023 is: '/mnt/d/data/2023/1/BTCBUSD-ohlc-5T-2023-01-11.csv'
- renko data: '/mnt/d/data/2023/1/BTCBUSD-renko-20-2023-01-11.csv'
- Raw trades for the same day: '/mnt/d/data/BTCBUSD-trades-2023-01-11.csv'



In [2]:
from datetime import date, timedelta
import glob
import numpy as np
from os.path import isfile
import pandas as pd
import pickle
from sys import getsizeof

from raw_trades_processor import RawTradesProcessor

directory = '/mnt/d/data'

#### Reading all ohlc csvs for 01/2023

In [3]:

files = glob.glob(f'{directory}/2023/1/*ohlc-15T*')  # all ohlc files with 15m for january
# '/mnt/d/data/202*/*/*ohlc-15m*' --> all ohlc files 15m 

df = pd.DataFrame()
for f in files:
    csv = pd.read_csv(f)
    df = pd.concat([df, csv], ignore_index=True)

display(df)

# ... post processing, TA, etc...

Unnamed: 0,open_time,close_time,open,high,low,close,volume,volume_quoted,volume_maker,volume_taker,volume_qt_maker,volume_qt_taker,num_trades
0,2023-01-01 00:00:00.029,2023-01-01 00:14:59.841,16534.83,16537.13,16512.64,16514.29,767.03891,1.267594e+07,383.98064,383.05827,6.345600e+06,6.330340e+06,20993
1,2023-01-01 00:15:00.015,2023-01-01 00:29:59.037,16514.09,16537.90,16510.76,16536.32,664.36412,1.097953e+07,343.35533,321.00879,5.674578e+06,5.304948e+06,20147
2,2023-01-01 00:30:00.037,2023-01-01 00:44:59.970,16536.31,16536.83,16501.00,16508.08,805.73222,1.330690e+07,439.02490,366.70732,7.250725e+06,6.056173e+06,27779
3,2023-01-01 00:45:00.014,2023-01-01 00:59:59.939,16508.07,16529.00,16507.65,16521.49,554.20801,9.156001e+06,292.41923,261.78878,4.831038e+06,4.324963e+06,21501
4,2023-01-01 01:00:00.142,2023-01-01 01:14:59.932,16521.95,16534.37,16518.38,16530.70,576.47955,9.527531e+06,295.16223,281.31732,4.878116e+06,4.649415e+06,22373
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971,2023-01-31 22:45:00.169,2023-01-31 22:59:59.995,22986.94,23309.82,22985.52,23183.75,3284.16763,7.613108e+07,1459.54017,1824.62746,3.383551e+07,4.229557e+07,64390
2972,2023-01-31 23:00:00.097,2023-01-31 23:14:59.569,23183.73,23189.00,23083.00,23108.72,2166.57399,5.009551e+07,1091.34708,1075.22691,2.523379e+07,2.486172e+07,38422
2973,2023-01-31 23:15:00.014,2023-01-31 23:29:59.683,23108.56,23138.52,23082.20,23108.00,1264.58566,2.923383e+07,627.91514,636.67052,1.451514e+07,1.471869e+07,27082
2974,2023-01-31 23:30:00.100,2023-01-31 23:44:59.341,23107.00,23148.52,23100.84,23122.04,1015.21475,2.347677e+07,498.23848,516.97627,1.152154e+07,1.195523e+07,21578


#### Example converting raw trades

Example of converting raw trades to renko, ohlc, volume resample etc

In [4]:
# first time converting raw trades, there are no previous remains

# constants
directory = '/mnt/d/data'
pair = 'BTCBUSD'
period_ohlc = '15T'
renko_size = 50
log_size = 0.0024  # 0.24%
vol_size = 1200
dollar_size = 30000000

d1 = date(2022,11,1)
d2 = date(2023,1,31)

# this will give you a list containing all of the dates
dd = [d1 + timedelta(days=x) for x in range((d2-d1).days + 1)]

d = dd[0]
processor = RawTradesProcessor(f'{directory}/{pair}-trades-{str(d)}.zip')

# ohlc
processor.raw_to_ohlc(period_ohlc).to_csv(
    f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-ohlc-{period_ohlc}-{str(d)}.csv', index=False)

# renko
df_renko, df_rem_renko, prev_close_renko, prev_trend_renko, prev_cumdiff_renko = processor.raw_to_renko(
    renko_size) 
df_renko.to_csv(
    f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-renko-{renko_size}-{str(d)}.csv', index=False)

# log renko
df_log, df_rem_log, prev_close_log, prev_trend_log, prev_cumdiff_log = processor.raw_to_renko_log(log_size)
df_log.to_csv(
    f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-log-{log_size}-{str(d)}.csv', index=False)

# volume
df_vol, df_rem_vol, prev_cumvol = processor.raw_to_volume_bars(vol_size)
df_vol.to_csv(
    f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-vol-{vol_size}-{str(d)}.csv', index=False)

# dollar / volume quoted
df_dollar, df_rem_dollar, prev_cumdollar = processor.raw_to_dollar_bars(dollar_size)
df_dollar.to_csv(
    f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-dollar-{dollar_size}-{str(d)}.csv', index=False)

print(f'Success: {directory}/{pair}-trades-{str(d)}.zip')

Success: /mnt/d/data/BTCBUSD-trades-2022-11-01.zip


In [5]:
display(df_renko)

Unnamed: 0,open_time,close_time,open,close,high,low,volume,volume_quoted,volume_maker,volume_taker,volume_qt_maker,volume_qt_taker,num_trades
0,2022-11-01 00:00:00.029,2022-11-01 00:28:26.257,20489.49,20439.49,20503.19,20439.45,2576.36381,52742690.0,1205.08234,1371.28147,24669280.0,28073410.0,52320
1,2022-11-01 00:28:26.257,2022-11-01 01:31:17.686,20489.49,20539.49,20539.5,20437.95,4574.34068,93658540.0,2146.91155,2427.42913,43954200.0,49704340.0,101609
2,2022-11-01 01:31:17.686,2022-11-01 06:10:39.116,20539.49,20589.49,20589.53,20460.0,19579.73595,401730900.0,9711.42114,9868.31481,199250900.0,202480000.0,419097
3,2022-11-01 06:10:39.116,2022-11-01 06:15:46.881,20589.49,20639.49,20639.51,20576.07,1005.23966,20710070.0,511.42511,493.81455,10536300.0,10173770.0,17248
4,2022-11-01 06:15:46.881,2022-11-01 09:46:24.702,20639.49,20689.49,20689.5,20540.42,15964.96335,328915900.0,7946.96949,8017.99386,163720800.0,165195100.0,341461
5,2022-11-01 09:46:24.702,2022-11-01 09:51:13.296,20639.49,20589.49,20695.41,20589.35,980.26129,20236160.0,518.40566,461.85563,10701580.0,9534587.0,17246
6,2022-11-01 09:51:13.296,2022-11-01 11:15:37.450,20589.49,20539.49,20649.78,20539.46,7048.91303,145250700.0,3459.23624,3589.67679,71277920.0,73972760.0,152979
7,2022-11-01 11:15:37.450,2022-11-01 11:17:12.247,20539.49,20489.49,20547.02,20489.47,406.86496,8349751.0,231.2543,175.61066,4745445.0,3604306.0,8294
8,2022-11-01 11:17:12.247,2022-11-01 13:42:32.139,20489.49,20439.49,20566.21,20439.48,12576.33418,258140500.0,6162.89403,6413.44015,126493500.0,131646900.0,265551
9,2022-11-01 13:42:32.139,2022-11-01 13:46:55.346,20439.49,20389.49,20464.61,20389.41,1536.1478,31383940.0,766.11193,770.03587,15651380.0,15732560.0,23615


In [None]:
# second time, the remains of the previous day will be passed to this day, to ensure that the junction of the 
# two days is connected and that the previous day's trades are processed in this batch.


for d in dd[1:2]:  # delete this and leave dd[1:] this way is only converting the second day
    processor = RawTradesProcessor(f'{directory}/{pair}-trades-{str(d)}.zip')

    # ohlc
    processor.raw_to_ohlc(period_ohlc).to_csv(
        f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-ohlc-{period_ohlc}-{str(d)}.csv', index=False)

    # renko
    df_renko, df_rem_renko, prev_close_renko, prev_trend_renko, prev_cumdiff_renko = processor.raw_to_renko(
        renko_size, df_rem_renko, prev_close_renko, prev_trend_renko, prev_cumdiff_renko)
    df_renko.to_csv(
        f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-renko-{renko_size}-{str(d)}.csv', index=False)

    # log renko
    df_log, df_rem_log, prev_close_log, prev_trend_log, prev_cumdiff_log = processor.raw_to_renko_log(log_size)
    df_log.to_csv(
        f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-log-{log_size}-{str(d)}.csv', index=False)

    # volume
    df_vol, df_rem_vol, prev_cumvol = processor.raw_to_volume_bars(vol_size)
    df_vol.to_csv(
        f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-vol-{vol_size}-{str(d)}.csv', index=False)

    # dollar / volume quoted
    df_dollar, df_rem_dollar, prev_cumdollar = processor.raw_to_dollar_bars(dollar_size)
    df_dollar.to_csv(
        f'{directory}/{str(d.year)}/{str(d.month)}/{pair}-dollar-{dollar_size}-{str(d)}.csv', index=False)

    print(f'Success: {directory}/{pair}-trades-{str(d)}.zip')

In [None]:
# Example of storing the remains of the last day in a pickle

with open(f'{directory}/{pair}-remains-{str(d)}.pkl', 'wb') as f:
    pickle.dump(
        [
        df_rem_renko, prev_close_renko, prev_trend_renko, prev_cumdiff_renko,
        df_rem_log, prev_close_log, prev_trend_log, prev_cumdiff_log,
        df_rem_vol, prev_cumvol,
        df_rem_dollar, prev_cumdollar
        ],
        f)
print(f'Success pkl: {directory}/{pair}-remains-{str(d)}.pkl')

if you want to make a script, join the previous 3 cells of code into a .py file

In [None]:
# next time is just needed to load these before the first conversion
# this way there is no need to compute everything again just to add one day

with open(f'{directory}/{pair}-remains-{str(d)}.pkl', 'rb') as f: 
        [
        df_rem_renko2, prev_close_renko2, prev_trend_renko2, prev_cumdiff_renko2,
        df_rem_log2, prev_close_log2, prev_trend_log2, prev_cumdiff_log2,
        df_rem_vol2, prev_cumvol2,
        df_rem_dollar2, prev_cumdollar2
        ] = pickle.load(f)