In [None]:
!pip install statsmodels --user  
!pip install tensorboardX --user
!pip install pandas==0.24.2 --user
!pip install plotly
!pip install bs4 --user
!pip install plotly --user
!pip install -U auquan_toolbox --user
!pip install alpaca_trade_api
!pip install pykalman

In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product
import numpy as np
import alpaca_trade_api as tradeapi
import statsmodels
from statsmodels.tsa.stattools import coint
from pykalman import KalmanFilter

In [None]:
api = tradeapi.REST(API_KEY, API_SECRET, base_url='https://paper-api.alpaca.markets')

nasdaq_data = api.get_bars('QQQ', '1Day', start='2018-01-01', end='2024-08-01', adjustment='raw').df

btc_data = api.get_crypto_bars('BTC/USD', '1Day', start='2018-01-01', end='2024-08-01').df

nasdaq_data.index = pd.to_datetime(nasdaq_data.index)
btc_data.index = pd.to_datetime(btc_data.index)

merged_data = pd.merge(nasdaq_data, btc_data, left_index=True, right_index=True, suffixes=('_nasdaq', '_bitcoin'))
#print(merged_data)
S1 = merged_data['close_bitcoin']
S2 = merged_data['close_nasdaq']
score, pvalue, _ = coint(S1, S2)
print("{pvalue}, {score}")

In [None]:
score, pvalue, _ = coint(S1, S2)
#print(pvalue)
ratios = S1 / S2
ratios.plot(figsize=(15,7))
plt.axhline(ratios.mean())
plt.legend(['Price Ratio'])
plt.show()

In [None]:
def zscore(series):
    return (series - series.mean()) / np.std(series)
zscore(ratios).plot(figsize=(15,7))
plt.axhline(zscore(ratios).mean(), color='black')
plt.axhline(1.0, color='red', linestyle='--')
plt.axhline(-1.0, color='green', linestyle='--')
plt.legend(['Ratio z-score', 'Mean', '+1', '-1'])
plt.show()

In [None]:
ratios = merged_data['close_bitcoin'] / merged_data['close_nasdaq']
ratiolen = round(len(ratios) * .8)
train = ratios[:ratiolen]
test = ratios[ratiolen:]
#print(len(train))

In [None]:
ratios_mavg5 = train.rolling(window=5,
                               center=False).mean()

ratios_mavg60 = train.rolling(window=60,
                               center=False).mean()

std_60 = train.rolling(window=60,
                        center=False).std()

zscore_60_5 = (ratios_mavg5 - ratios_mavg60)/std_60
plt.figure(figsize=(15,7))
plt.plot(train.index, train.values)
plt.plot(ratios_mavg5.index, ratios_mavg5.values)
plt.plot(ratios_mavg60.index, ratios_mavg60.values)

plt.legend(['Ratio','5d Ratio MA', '60d Ratio MA'])

plt.ylabel('Ratio')
plt.show()

In [None]:
#rolling 60 day
std_60 = train.rolling(window=60,center=False).std()
std_60.name = 'std 60d'

# z score each day
zscore_60_5 = (ratios_mavg5 - ratios_mavg60)/std_60
zscore_60_5.name = 'z-score'

plt.figure(figsize=(15,7))
zscore_60_5.plot()
plt.axhline(0, color='black')
plt.axhline(1.0, color='red', linestyle='--')
plt.axhline(-1.0, color='green', linestyle='--')
plt.legend(['Rolling Ratio z-Score', 'Mean', '+1', '-1'])
plt.show()

In [None]:
plt.figure(figsize=(15,7))

train[60:].plot()
buy = train.copy()
sell = train.copy()
buy[zscore_60_5>-1] = 0
sell[zscore_60_5<1] = 0
buy[60:].plot(color='g', linestyle='None', marker='^')
sell[60:].plot(color='r', linestyle='None', marker='^')
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,ratios.min(),ratios.max()))
plt.legend(['Ratio', 'Buy Signal', 'Sell Signal'])
plt.show()

In [None]:
plt.figure(figsize=(18,9))
S1 = merged_data['close_bitcoin'].iloc[:ratiolen]
S2 = merged_data['close_nasdaq'].iloc[:ratiolen]

S1[60:].plot(color='b')
S2[60:].plot(color='c')
buyR = 0*S1.copy()
sellR = 0*S1.copy()

buyR[buy!=0] = S1[buy!=0]
sellR[buy!=0] = S2[buy!=0]

buyR[sell!=0] = S2[sell!=0]
sellR[sell!=0] = S1[sell!=0]

buyR[60:].plot(color='g', linestyle='None', marker='^')
sellR[60:].plot(color='r', linestyle='None', marker='^')
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,min(S1.min(),S2.min()),max(S1.max(),S2.max())))

plt.legend(['BTC','NASDAQ', 'Buy Signal', 'Sell Signal'])
plt.show()

In [None]:
#mean reversion w/ pairs? (doesnt work)
start_date = '2020-01-01'
end_date = '2024-01-01'

nasdaq_data = yf.download('QQQ', start=start_date, end=end_date, interval='1d')
bitcoin_data = yf.download('BTC-USD', start=start_date, end=end_date, interval='1d')

merged_data = pd.merge(nasdaq_data['Close'], bitcoin_data['Close'], left_index=True, right_index=True, suffixes=('_nasdaq', '_bitcoin'))

# normalize
merged_data['close_nasdaq_normalized'] = merged_data['Close_nasdaq'] / merged_data['Close_nasdaq'].iloc[0]
merged_data['close_bitcoin_normalized'] = merged_data['Close_bitcoin'] / merged_data['Close_bitcoin'].iloc[0]

def kalman_filter_ratio(S1, S2):
    delta = 1e-4
    trans_cov = delta / (1 - delta) * np.eye(2)
    obs_mat = np.expand_dims(np.vstack([S2, np.ones(S2.shape)]).T, axis=1)

    kf = KalmanFilter(n_dim_obs=1, n_dim_state=2,
                      initial_state_mean=[0, 0],
                      initial_state_covariance=np.ones((2, 2)),
                      transition_matrices=np.eye(2),
                      observation_matrices=obs_mat,
                      observation_covariance=1.0,
                      transition_covariance=trans_cov)

    state_means, state_covs = kf.filter(S1)
    return state_means

# hedge ratio for shares
state_means = kalman_filter_ratio(merged_data['close_bitcoin_normalized'].values, merged_data['close_nasdaq_normalized'].values)
hedge_ratio = state_means[:, 0]

spread = merged_data['close_bitcoin_normalized'] - hedge_ratio * merged_data['close_nasdaq_normalized']

mean_spread = spread.rolling(window=60).mean()
std_spread = spread.rolling(window=60).std()
zscore_spread = (spread - mean_spread) / std_spread

zscore_spread = pd.Series(zscore_spread, index=merged_data.index)

def trade(real_btc_prices, real_nasdaq_prices, hedge_ratio, zscore_spread, initial_capital=5000, max_position_size_pct=0.05):
    capital = initial_capital
    btc_position = 0
    nasdaq_position = 0
    portfolio_values = []
    trades = []

    for i in range(len(zscore_spread)):
        btc_price = real_btc_prices[i]
        nasdaq_price = real_nasdaq_prices[i]
        date = zscore_spread.index[i].strftime('%Y-%m-%d')

        portfolio_value = capital + btc_position * btc_price + nasdaq_position * nasdaq_price
        position_size = portfolio_value * max_position_size_pct

        if zscore_spread[i] > 2:
            btc_position_size = position_size / btc_price
            nasdaq_position_size = position_size / nasdaq_price

            capital += btc_position_size * btc_price
            btc_position -= btc_position_size
            capital += nasdaq_position_size * nasdaq_price
            nasdaq_position -= nasdaq_position_size
            print(f"Trade {i} ({date}): Sell BTC and Nasdaq (Mean Reversion Downward)")
            trades.append((i, "sell_both"))
        
        elif zscore_spread[i] < -2:
            btc_position_size = position_size / btc_price
            nasdaq_position_size = position_size / nasdaq_price

            capital -= btc_position_size * btc_price
            btc_position += btc_position_size
            capital -= nasdaq_position_size * nasdaq_price
            nasdaq_position += nasdaq_position_size
            print(f"Trade {i} ({date}): Buy BTC and Nasdaq (Mean Reversion Upward)")
            trades.append((i, "buy_both"))

        elif abs(zscore_spread[i]) < 0.5:
            capital += btc_position * btc_price + nasdaq_position * nasdaq_price
            btc_position = 0
            nasdaq_position = 0
            print(f"Trade {i} ({date}): Clearing Positions")
            trades.append((i, "clear"))

        portfolio_value = capital + btc_position * btc_price + nasdaq_position * nasdaq_price
        portfolio_values.append(portfolio_value)
        print(f"Portfolio Value: ${portfolio_value:.2f} | Capital: ${capital:.2f} | BTC Position: {btc_position:.4f} | Nasdaq Position: {nasdaq_position:.4f}")
        print(f"BTC Price: ${btc_price:.2f} | Nasdaq Price: ${nasdaq_price:.2f}\n")

    final_portfolio_value = capital + btc_position * real_btc_prices[-1] + nasdaq_position * real_nasdaq_prices[-1]
    
    returns = pd.Series(portfolio_values).pct_change().dropna()
    sharpe_ratio = np.sqrt(252) * returns.mean() / returns.std() #252?
    
    return final_portfolio_value, trades, sharpe_ratio

result, trades, sharpe_ratio = trade(merged_data['Close_bitcoin'].values, merged_data['Close_nasdaq'].values, hedge_ratio, zscore_spread)
print(f"Final portfolio value: ${result:.2f}")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
plt.figure(figsize=(15, 7))
plt.plot(zscore_spread, label="Z-score")
plt.axhline(0, color='black')
plt.axhline(1.0, color='red', linestyle='--')
plt.axhline(-1.0, color='green', linestyle='--')

for trade_idx, trade_type in trades:
    if trade_type == "buy_both":
        plt.scatter(zscore_spread.index[trade_idx], zscore_spread[trade_idx], color='green', marker='^', s=100, label='Buy BTC and Nasdaq' if trade_idx == trades[0][0] else "")
    elif trade_type == "sell_both":
        plt.scatter(zscore_spread.index[trade_idx], zscore_spread[trade_idx], color='red', marker='v', s=100, label='Sell BTC and Nasdaq' if trade_idx == trades[0][0] else "")
    elif trade_type == "clear":
        plt.scatter(zscore_spread.index[trade_idx], zscore_spread[trade_idx], color='blue', marker='o', s=100, label='Clear' if trade_idx == trades[0][0] else "")

plt.legend()
plt.title(f'Z-score of Spread (Using Kalman Filter) with Trade Markers\nSharpe Ratio: {sharpe_ratio:.2f}')
plt.show()


In [None]:
def plot_combined_with_dual_axes(start_date, end_date, window=180):
    nasdaq_data = yf.download('QQQ', start=start_date, end=end_date, interval='1d')
    bitcoin_data = yf.download('BTC-USD', start=start_date, end=end_date, interval='1d')
    merged_data = pd.merge(nasdaq_data['Close'], bitcoin_data['Close'], left_index=True, right_index=True, suffixes=('_nasdaq', '_bitcoin'))

    merged_data['Spread'] = merged_data['Close_bitcoin'] - merged_data['Close_nasdaq']
    merged_data['Spread_Mean'] = merged_data['Spread'].rolling(window=window).mean()
    merged_data['Spread_Std'] = merged_data['Spread'].rolling(window=window).std()
    merged_data['Z-Score'] = (merged_data['Spread'] - merged_data['Spread_Mean']) / merged_data['Spread_Std']

    merged_data.dropna(subset=['Z-Score'], inplace=True)

    fig, ax1 = plt.subplots(figsize=(14, 8))

    # nasdaq
    ax1.plot(merged_data.index, merged_data['Close_nasdaq'], color='blue', label='Nasdaq (QQQ)')
    ax1.set_xlabel('Date')
    ax1.set_ylabel('Nasdaq (QQQ) Price ($)', color='blue')
    ax1.tick_params(axis='y', labelcolor='blue')
    
    # btc
    ax2 = ax1.twinx()
    ax2.plot(merged_data.index, merged_data['Close_bitcoin'], color='orange', label='Bitcoin (BTC-USD)')
    ax2.set_ylabel('Bitcoin (BTC-USD) Price ($)', color='orange')
    ax2.tick_params(axis='y', labelcolor='orange')

    #ax1.plot(merged_data.index, merged_data['Z-Score'], color='green', label=f'Z-Score (Window: {window})', linestyle='--')
    fig.suptitle(f'Nasdaq and Bitcoin Prices with Z-Score from {start_date} to {end_date}')
    fig.legend(loc="upper left", bbox_to_anchor=(0.1,0.85))

    ax1.grid(True)
    plt.show()

#plot_combined_with_dual_axes('2024-01-01', '2024-08-23', window=60)

In [None]:
#try hourly
#pairs
nasdaq_data = api.get_bars('QQQ', '1Hour', start='2021-01-01', end='2022-01-01', adjustment='raw').df
btc_data = api.get_crypto_bars('BTC/USD', '1Hour', start='2021-01-01', end='2022-01-01').df

nasdaq_data.index = pd.to_datetime(nasdaq_data.index)
btc_data.index = pd.to_datetime(btc_data.index)

merged_data = pd.merge(nasdaq_data, btc_data, left_index=True, right_index=True, suffixes=('_nasdaq', '_bitcoin'))
#print(merged_data)
def backtest_strategy(data, long_threshold, short_threshold, exit_zscore_threshold, window, trade_amount_percent, extended_zscore_threshold, extended_zscore_duration):
    cash = 5000
    nasdaq_position = 0
    bitcoin_position = 0
    extended_negative_counter = 0  # too long highly negative z score = bad performance
    max_extended_zscore_duration = extended_zscore_duration * 24  # Convert days to hours
    #not sure if needed
    data['normalized_nasdaq'] = data['close_nasdaq'] / data['close_nasdaq'].iloc[0]
    data['normalized_bitcoin'] = data['close_bitcoin'] / data['close_bitcoin'].iloc[0]

    data['spread'] = data['normalized_nasdaq'] - data['normalized_bitcoin']
    data['mean_spread'] = data['spread'].rolling(window=window).mean()
    data['std_spread'] = data['spread'].rolling(window=window).std()
    data['zscore'] = (data['spread'] - data['mean_spread']) / data['std_spread']

    data.dropna(subset=['zscore'], inplace=True)

    data['long_signal'] = data['zscore'] < long_threshold
    data['short_signal'] = data['zscore'] > short_threshold
    data['exit_signal'] = abs(data['zscore']) < exit_zscore_threshold

    portfolio_value = []
    trades = []
    dates = []

    for i in range(1, len(data)):
        date = data.index[i]
        price_nasdaq = data['close_nasdaq'].iloc[i]
        price_bitcoin = data['close_bitcoin'].iloc[i]
        portfolio_value_current = cash + (nasdaq_position * price_nasdaq) + (bitcoin_position * price_bitcoin)

        if date.weekday() == 4 and date.hour >= 15:  # friday @ 3
            if nasdaq_position != 0 or bitcoin_position != 0:
                cash += nasdaq_position * price_nasdaq
                cash += bitcoin_position * price_bitcoin
                nasdaq_position = 0
                bitcoin_position = 0
                trades.append((date, f"Exit positions (before weekend)", portfolio_value_current))
            continue

        if data['zscore'].iloc[i] < extended_zscore_threshold:
            extended_negative_counter += 1
        else:
            extended_negative_counter = 0

        if extended_negative_counter >= max_extended_zscore_duration:
            if nasdaq_position != 0 or bitcoin_position != 0:
                cash += nasdaq_position * price_nasdaq
                cash += bitcoin_position * price_bitcoin
                nasdaq_position = 0
                bitcoin_position = 0
                trades.append((date, f"Exit positions (prolonged negative z-score)", portfolio_value_current))
            extended_negative_counter = 0 
            continue

        # dynamic sizing
        adjusted_trade_amount_percent = trade_amount_percent / (1 + abs(data['zscore'].iloc[i]))

        if data['long_signal'].iloc[i] and cash > 0:
            trade_amount = cash * adjusted_trade_amount_percent
            nasdaq_position += trade_amount / price_nasdaq
            cash -= trade_amount
            bitcoin_position -= trade_amount / price_bitcoin
            cash += trade_amount
            trades.append((date, f"Long Nasdaq, Short Bitcoin", portfolio_value_current))

        elif data['short_signal'].iloc[i] and cash > 0:
            trade_amount = cash * adjusted_trade_amount_percent
            nasdaq_position -= trade_amount / price_nasdaq
            cash += trade_amount
            bitcoin_position += trade_amount / price_bitcoin
            cash -= trade_amount
            trades.append((date, f"Short Nasdaq, Long Bitcoin", portfolio_value_current))

        if data['exit_signal'].iloc[i] and (nasdaq_position != 0 or bitcoin_position != 0):
            cash += nasdaq_position * price_nasdaq
            cash += bitcoin_position * price_bitcoin
            nasdaq_position = 0
            bitcoin_position = 0
            trades.append((date, f"Exit positions (z-score normalization)", portfolio_value_current))

        portfolio_value.append(portfolio_value_current)
        dates.append(date)
    if len(portfolio_value) > 1:
        returns = np.diff(portfolio_value) / portfolio_value[:-1]
        risk = np.std(returns)
        avg_return = np.mean(returns)
        sharpe_ratio = avg_return / risk if risk != 0 else 0
    else:
        sharpe_ratio = 0

    return portfolio_value, trades, dates, sharpe_ratio

# grid search (curr values found to be best)
long_thresholds = [-1]
short_thresholds = [1]
exit_zscore_thresholds = [0.50]
windows = [180]
trade_amount_percents = [0.40]
extended_zscore_thresholds = [-2.0]
extended_zscore_durations = [3]

results = []

for (long_th, short_th, exit_zscore_th, window, trade_amount_percent, extended_zscore_th, extended_zscore_duration) in product(
        long_thresholds, short_thresholds, exit_zscore_thresholds, windows, trade_amount_percents, extended_zscore_thresholds, extended_zscore_durations):
    portfolio_value, trades, dates, sharpe_ratio = backtest_strategy(
        merged_data.copy(), long_th, short_th, exit_zscore_th, window, trade_amount_percent, extended_zscore_th, extended_zscore_duration)
    final_portfolio_value = portfolio_value[-1] if portfolio_value else 0
    results.append({
        'long_threshold': long_th,
        'short_threshold': short_th,
        'exit_zscore_threshold': exit_zscore_th,
        'window': window,
        'trade_amount_percent': trade_amount_percent,
        'extended_zscore_threshold': extended_zscore_th,
        'extended_zscore_duration': extended_zscore_duration,
        'final_portfolio_value': final_portfolio_value,
        'sharpe_ratio': sharpe_ratio
    })

results_df = pd.DataFrame(results)

# sort by sharpe first
best_result = results_df.sort_values(by=['sharpe_ratio', 'final_portfolio_value'], ascending=False).iloc[0]

print(f"The best parameter combination is: {best_result}")
portfolio_value, trades, dates, _ = backtest_strategy(
    merged_data.copy(), best_result['long_threshold'], best_result['short_threshold'], best_result['exit_zscore_threshold'], 
    int(best_result['window']), best_result['trade_amount_percent'], best_result['extended_zscore_threshold'], best_result['extended_zscore_duration'])

for trade in trades:
    date, action, portfolio_value = trade
    print(f"Date: {date}, Action: {action}, Portfolio Value: {portfolio_value:.2f}")