In [None]:
import glob
import gc
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set()
%matplotlib inline

In [None]:
MULTIPLIER = 50
COMMISSION = 0.85
TRAIN = 'TRAIN'
TEST = 'TEST'
TRAIN_PICKLE = 'TRAIN_PICKLE.pickle'
TEST_PICKLE = 'TEST_PICKLE.pickle'
TRAIN_DIR = 'TRAIN DATA/*.csv'
TEST_DIR = 'TEST DATA/*.csv'

In [None]:
# Set to False if your serialized file has not yet been generated
# Remember to change back to "True" once the serialized file has been generated
DATA_SERIALIZED = True

if not DATA_SERIALIZED:
    
    # Writing files to pickle
    train_files = glob.glob(TRAIN_DIR)
    test_files = glob.glob(TEST_DIR)
    train_files.sort()
    test_files.sort()
    train_list = []
    test_list = []
    for filename in train_files:
        train_list.append(pd.read_csv(filename, index_col=0))
    for filename in test_files:
        test_list.append(pd.read_csv(filename, index_col=0))
    train = pd.concat(train_list)
    test = pd.concat(test_list)
    train.to_pickle(TRAIN_PICKLE)
    test.to_pickle(TEST_PICKLE)
    
    # Garbage collection
    del(train_files)
    del(test_files)
    del(train_list)
    del(test_list)
    del(train)
    del(test)
    gc.collect()


In [None]:
train_data = pd.read_pickle(TRAIN_PICKLE)
test_data = pd.read_pickle(TEST_PICKLE)
train_data.index = pd.to_datetime(train_data.index)
test_data.index = pd.to_datetime(test_data.index)

In [None]:
def filterData (data, columns):
    toReturn = data.loc[data['eB'] > 0, :] # Filter out no data rows
    return toReturn.loc[:, columns]

In [None]:
def winning_probability (pnls):
    if len(pnls) == 0:
        return np.nan
    positive = 0
    for pnl in pnls:
        if pnl > 0:
            positive += 1
    return positive / len(pnls)

def reward_to_risk_ratio (pnls):
    if len(pnls) == 0:
        return np.nan
    wins = []
    losses = []
    for pnl in pnls:
        if pnl > 0:
            wins.append(pnl)
        else:
            losses.append(pnl)
    return np.mean(wins) / abs(np.mean(losses))

def t_stat (pnls):
    if len(pnls) == 0:
        return np.nan
    return np.sqrt(len(pnls)) * (np.mean(pnls) / np.std(pnls))

In [None]:
def init_rsi_data (data, mode) :
    if mode == 'TRAIN':
        rsi_data = filterData(data, TRAIN_START_DATE, TRAIN_END_DATE, ['eB', 'eA'])
    else:
        rsi_data = filterData(data, TEST_START_DATE, TEST_END_DATE, ['eB', 'eA'])
    rsi_data.rename(index=str, columns={'eB': "bid", "eA": "ask"}, inplace=True)
    rsi_data['mid'] = rsi_data[['bid', 'ask']].mean(axis=1)
    rsi_data['change'] = (rsi_data.mid - rsi_data.shift(1).mid)
    rsi_data = rsi_data.iloc[1:, :]
    rsi_data['gain'] = rsi_data['change'].apply(lambda x: x if x > 0 else 0)
    rsi_data['loss'] = rsi_data['change'].apply(lambda x: -x if x < 0 else 0)
    return rsi_data

In [None]:
PERIODS = [1,3, 5, 10, 15, 30, 60, 90, 120, 180, 330, 660]
LOOK_BACK = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]

In [None]:
def model (days, period, mode):
        
    # Varying the period of the RSI
    PERIOD = days * period

    # Prepare initial data table for manipulation
    rsi_data = init_rsi_data(data, mode)

    # Calculate RS and RSI based on varied period
    rsi_data['rs'] = rsi_data['gain'].rolling(window=PERIOD).mean() / rsi_data['loss'].rolling(window=PERIOD).mean()
    rsi_data['rsi'] = 100 - 100 / (1 + rsi_data['rs'])

    # Generate signals
    rsi_data['signal'] = rsi_data['rsi'].apply(lambda x: 1 if x > UPPER_BOUND else (-1 if x < LOWER_BOUND else 0))
    signal_data = rsi_data.loc[rsi_data['signal'] != 0, :]
    signal = 0
    indices = []
    for (index, value) in signal_data.iterrows():
        if signal != value.signal:
            signal = value.signal
            indices.append(index)
    if len(indices) % 2 != 0:
        indices.pop()

    # Consolidate price data and signals into transactions column
    trading_data = signal_data.loc[indices, :]
    if trading_data.shape[0] == 0: 
        # Corner case caught for empty signal data table
        trading_data['transaction'] = pd.Series()
    else:
        trading_data['transaction'] = trading_data.apply(lambda x: x.ask * -1 if x.signal > 0 else x.bid, axis=1)

    # Calculate PNLs from transactions, inclusive of commission and multiplier
    pnl_list = []
    transactions = list(trading_data['transaction'])
    for i in range(0, len(transactions), 2):
        pnl = (transactions[i] + transactions[i + 1]) * MULTIPLIER - 2 * COMMISSION
        pnl_list.append(pnl)

    # Calculate performance
    wp = winning_probability(pnl_list)
    rrr= reward_to_risk_ratio(pnl_list)
    ts = t_stat(pnl_list)
    total_pnl = sum(pnl_list)

    print (f'Periods: [{ period }], Look-back Days: [{ days }], Winning Probability: [{ wp }], Reward-to-Risk Ratio: [{ rrr }], T-Stat: [{ ts }], Total PNL: [{ total_pnl }]')
    

In [None]:
for period in PERIODS:
    for days in LOOK_BACK:
        model(days, period, TRAIN)

In [None]:
# Parameters chosen based on highest t-statistic
model(90, 60, TEST)