In [None]:
import pandas as pd
import numpy as np

In [None]:
currency_pair = 'EURUSD'
periods       = [f'2019{str(x+1).zfill(2)}' for x in range(6)]

df_list = []
for period in periods:
    source_file = f'resources/data/DAT_ASCII_{currency_pair}_T_{period}.csv'
    df_chunks   = pd.read_csv(source_file, sep=',',
                              header=None, names=['datetime', 'bid', 'ask', 'vol'],
                              usecols=['datetime', 'bid', 'ask'],
                              parse_dates=['datetime'],
                              date_parser=lambda x: pd.to_datetime(x, format='%Y%m%d %H%M%S%f'),
                              chunksize=50_000)
    
    timeseries_df = pd.concat(df_chunks)
    df_list.append(timeseries_df)
    
timeseries_df = pd.concat(df_list)
timeseries_df.shape

In [None]:
def aggregation(df, rule):
    bid_df = df.set_index('datetime')['bid'].resample(rule).ohlc().reset_index()
    ask_df = df.set_index('datetime')['ask'].resample(rule).ohlc().reset_index()

    bid_df.dropna(inplace=True)
    ask_df.dropna(inplace=True)

    bid_df.reset_index(drop=True, inplace=True)
    ask_df.reset_index(drop=True, inplace=True)
    
    timeseries_df = pd.DataFrame({
        'datetime': bid_df['datetime'],

        'open_bid': bid_df['open'],
        'high_bid': bid_df['high'],
        'low_bid': bid_df['low'],
        'bid': bid_df['close'],

        'open_ask': ask_df['open'],
        'high_ask': ask_df['high'],
        'low_ask': ask_df['low'],
        'ask': ask_df['close']
    })
    
    # Calculate RSI
    # Reference: https://www.youtube.com/watch?v=WZbOeFsSirM

    n_timestep = 14
    timeseries_df['bid_movement'] = timeseries_df['bid'].diff()
    timeseries_df['ask_movement'] = timeseries_df['ask'].diff()

    timeseries_df['bid_upward_movement']   = np.where(timeseries_df['bid_movement'] > 0, timeseries_df['bid_movement'], 0)
    timeseries_df['bid_downward_movement'] = np.where(timeseries_df['bid_movement'] < 0, np.abs(timeseries_df['bid_movement']), 0)

    timeseries_df['ask_upward_movement']   = np.where(timeseries_df['ask_movement'] > 0, timeseries_df['ask_movement'], 0)
    timeseries_df['ask_downward_movement'] = np.where(timeseries_df['ask_movement'] < 0, np.abs(timeseries_df['ask_movement']), 0)

    timeseries_df.at[n_timestep -1, 'bid_avg_upward_movement']   = timeseries_df[['bid_upward_movement']][:n_timestep].values.mean()
    timeseries_df.at[n_timestep -1, 'bid_avg_downward_movement'] = timeseries_df[['bid_downward_movement']][:n_timestep].values.mean()

    timeseries_df.at[n_timestep -1, 'ask_avg_upward_movement']   = timeseries_df[['ask_upward_movement']][:n_timestep].values.mean()
    timeseries_df.at[n_timestep -1, 'ask_avg_downward_movement'] = timeseries_df[['ask_downward_movement']][:n_timestep].values.mean()

    timeseries_df = timeseries_df[n_timestep -1:].copy()
    timeseries_df.reset_index(inplace=True, drop=True)

    for row in timeseries_df[1:].itertuples():
        timeseries_df.at[row.Index, 'bid_avg_upward_movement']   = (timeseries_df.at[row.Index -1, 'bid_avg_upward_movement'] * (n_timestep -1) + row.bid_upward_movement) / n_timestep
        timeseries_df.at[row.Index, 'bid_avg_downward_movement'] = (timeseries_df.at[row.Index -1, 'bid_avg_downward_movement'] * (n_timestep -1) + row.bid_downward_movement) / n_timestep

        timeseries_df.at[row.Index, 'ask_avg_upward_movement']   = (timeseries_df.at[row.Index -1, 'ask_avg_upward_movement'] * (n_timestep -1) + row.ask_upward_movement) / n_timestep
        timeseries_df.at[row.Index, 'ask_avg_downward_movement'] = (timeseries_df.at[row.Index -1, 'ask_avg_downward_movement'] * (n_timestep -1) + row.ask_downward_movement) / n_timestep

    timeseries_df['bid_relative_strength'] = timeseries_df['bid_avg_upward_movement'] / timeseries_df['bid_avg_downward_movement']
    timeseries_df['ask_relative_strength'] = timeseries_df['ask_avg_upward_movement'] / timeseries_df['ask_avg_downward_movement']

    timeseries_df['bid_rsi'] = 100 - (100 / (timeseries_df['bid_relative_strength'] + 1))
    timeseries_df['ask_rsi'] = 100 - (100 / (timeseries_df['ask_relative_strength'] + 1))

    timeseries_df.drop(columns=[
        'bid_movement', 'ask_movement',
        'bid_upward_movement', 'bid_downward_movement',
        'ask_upward_movement', 'ask_downward_movement',
        'bid_avg_upward_movement', 'bid_avg_downward_movement',
        'ask_avg_upward_movement', 'ask_avg_downward_movement',
        'bid_relative_strength', 'ask_relative_strength'
    ], inplace=True)
    
    for column in [x for x in timeseries_df.columns if x != 'datetime']:
        timeseries_df[column] = np.round(timeseries_df[column], 5)
        
    return timeseries_df

In [None]:
# Aggregation
day_df  = aggregation(timeseries_df, rule='1D')
hour_df = aggregation(timeseries_df, rule='1H')
min_df  = aggregation(timeseries_df, rule='1Min')

# Export
day_df.to_csv(f'resources/data/DAT_ASCII_{currency_pair}_Day_{periods[0]}-{periods[-1]}.csv', index=False)
hour_df.to_csv(f'resources/data/DAT_ASCII_{currency_pair}_Hour_{periods[0]}-{periods[-1]}.csv', index=False)
min_df.to_csv(f'resources/data/DAT_ASCII_{currency_pair}_Min_{periods[0]}-{periods[-1]}.csv', index=False)