# Grid search signal/price movement relationship

See [explore-signal.ipynb](./explore-signal.ipynb).

In [1]:
import datetime

import pandas as pd

from tradingstrategy.client import Client
from tradingstrategy.chain import ChainId
from tradingstrategy.timebucket import TimeBucket
from tradeexecutor.utils.default_strategies import get_default_strategies_path
from tradeexecutor.strategy.execution_context import notebook_execution_context
from tradeexecutor.utils.default_strategies import get_default_strategies_path
from tradeexecutor.strategy.strategy_module import read_strategy_module
from tradeexecutor.strategy.trading_strategy_universe import TradingStrategyUniverse, load_trading_and_lending_data
from tradeexecutor.strategy.universe_model import UniverseOptions


def create_trading_universe(
    ts: datetime.datetime,
    client: Client,
    execution_context,
    universe_options,
) -> TradingStrategyUniverse:
    
    dataset = load_trading_and_lending_data(
        client,
        execution_context=execution_context,
        universe_options=universe_options,
        # Ask for all Polygon data
        chain_id=ChainId.polygon,
        exchange_slugs={"uniswap-v3"},
        reserve_asset_symbols={"USDC"},
        asset_symbols={"LINK", "WMATIC", "WETH"},
        trading_fee=0.0005,  # Only use pairs at 0.05% fee tier
        time_bucket=TimeBucket.h1,
    )

    universe = TradingStrategyUniverse.create_from_dataset(dataset)

    return universe


client = Client.create_jupyter_client()

strategy_universe = create_trading_universe(
    datetime.datetime.utcnow(),
    client,
    notebook_execution_context,
    UniverseOptions(start_at=pd.Timestamp("2022-01-01"), end_at=pd.Timestamp("2023-11-01")),
)

data_universe = strategy_universe.data_universe

Started Trading Strategy in Jupyter notebook environment, configuration is stored in /Users/moo/.tradingstrategy


Downloading interest rate data for 4 assets:   0%|          | 0/8 [00:00<?, ?it/s]

## Explore signal vs price change

Create a function `calculate_signal_vs_profit` which calculates 
- Signal (naive momentum)
- Profit: the last and the best future price we can get
- Allows us to play around with different time windows
- Split between shorts and longs


In [2]:
import numpy as np
from pandas.tseries.frequencies import to_offset

from tradingstrategy.utils.forward_fill import forward_fill
from tradingstrategy.pair import DEXPair


def calculate_signal_vs_profit(
    df, 
    signal_window: pd.Timedelta, 
    profit_window: pd.Timedelta,
    data_time_bucket: pd.Timedelta,        
) -> pd.DataFrame:
    """Calculate signals and profits for all incoming candles."""
    
    # Create entries for past price to be used for signal
    # and future price (used for the price correlation)
    signal_offset = to_offset(signal_window)
    profit_offset = to_offset(profit_window)

    df["prev"] = df["close"].shift(freq=signal_offset)
    df["next"] = df["open"].shift(freq=-profit_offset)

    # Calculate signal from the past and price difference to the future
    df["signal"] = (df["prev"] - df["open"]) / df["open"]
    df["price_diff"] = (df["next"] - df["open"]) / df["open"]  # Get the profit on the final day of profit window

    # On negative signals, we go short.
    # On zero signal and lack of data set side to NA
    df["side"] = pd.NA
    df.loc[df["signal"] > 0, "side"] = "long"
    df.loc[df["signal"] < 0, "side"] = "short"

    number_of_candles = profit_window / data_time_bucket
    assert number_of_candles > 0 and number_of_candles.is_integer(), f"Could not calculate candle count that fits into profit window {profit_window} for data time frame {data_time_bucket}"
    number_of_candles = int(number_of_candles)

    # Max and min price wihtin the profit window will determine the profit for longs and shorts respective
    df["max_future_price"] = df["close"].rolling(number_of_candles).max().shift(-number_of_candles) # Get the max profit on the profit window, assuming take profit %
    df["min_future_price"] = df["close"].rolling(number_of_candles).min().shift(-number_of_candles) # Get the max profit on the profit window, assuming take profit %    
    
    # Calculate profit separately for longs and shorts
    # using Pandas Mask
    # https://stackoverflow.com/a/33770421/315168
    #
    # We calculate both profit after X time,
    # and also max take profit, assuming
    # we could do a perfect trailing stop loss
    #
    longs = (df["side"] == "long")
    shorts = (df["side"] == "short")
    df.loc[longs, "profit"] = df["price_diff"]
    df.loc[shorts, "profit"] = -df["price_diff"]
    df.loc[longs, "profit_max"] = (df["max_future_price"] - df["open"]) / df["open"]  # Get the profit based on max price
    df.loc[shorts, "profit_max"] = -(df["min_future_price"] - df["open"]) / df["open"]  # Get the profit based on max price

    df.loc[longs, "desc"] = df.agg('{0[pair]} long'.format, axis=1)
    df.loc[shorts, "desc"] = df.agg('{0[pair]} short'.format, axis=1)
    
    return df

Create a function `calculate_signal_vs_price_for_pair` which calculates 
- Calculates the signal vs. for certain trading pair
- Bundle few different pairs to the same `DataFrame` so we can examine them together


In [3]:
def calculate_signal_vs_price_for_pair(
    df: pd.DataFrame, 
    signal_window=pd.Timedelta(days=7),
    profit_window=pd.Timedelta(days=7),
) -> pd.DataFrame:
    """Calculate signal vs. profit ratio for an individual pair."""

    df = calculate_signal_vs_profit(
        df,
        signal_window,
        profit_window,
        data_time_bucket=data_universe.time_bucket.to_pandas_timedelta(),
    )

    return df

In [4]:
# Pull out trading pair details for pairs we are examining
pairs = data_universe.pairs
examined_pairs = [
    pairs.get_pair_by_human_description([ChainId.polygon, "uniswap-v3", "WETH", "USDC"]),
    pairs.get_pair_by_human_description([ChainId.polygon, "uniswap-v3", "LINK", "USDC"]),
    pairs.get_pair_by_human_description([ChainId.polygon, "uniswap-v3", "WMATIC", "USDC"]),
]

# Prepare trading pair data

In [5]:
pair_dfs = []
for pair in examined_pairs:
    df = data_universe.candles.get_candles_by_pair(pair)
    assert df is not None
    df = df.copy()      
    df = forward_fill(
        df, 
        freq=data_universe.time_bucket.to_frequency(), 
        columns=("open", "high", "low", "close")
    )
    df["pair"] = pair.get_ticker()
    pair_dfs.append(df)

print(f"Prepared trading pair data for {len(pair_dfs)} pairs")


Prepared trading pair data for 3 pairs


## Grid search correlative parameters

Grid search for the best lookback and look foward parametrs.

- Go from 4 hours to 14 days for both parameters
- We use a background worker pool to speed up the computation, by allocating background threads/workers for each task

In [6]:
import futureproof
import statsmodels.api as sm

from tqdm.auto import tqdm

look_backs = [pd.Timedelta(hours=hours) for hours in range(4, 30*24, 16)]
look_forwards = [pd.Timedelta(hours=hours) for hours in range(4, 30*24, 16)]

# look_backs = [pd.Timedelta(hours=hours) for hours in range(4, 12, 4)]
# look_forwards = [pd.Timedelta(hours=hours) for hours in range(4, 12, 4)]


def process_background_job(look_back: pd.Timedelta, look_forward: pd.Timedelta) -> tuple:
    # Create signal vs. price analysis for examined pairs and calculate correlation
    # Make a copy of DataFrame as it is mutated in-place
    signal_vs_profit = [calculate_signal_vs_price_for_pair(df.copy(), look_back, look_forward) for df in pair_dfs]
    
    # Calculate linear regression for signal vs. profit 
    df = pd.concat(signal_vs_profit)
    df = df.dropna()
    longs = df.loc[df["side"] == "long"]
    shorts = df.loc[df["side"] == "short"]

    # https://stackoverflow.com/a/54685349/315168
    #regression = sm.OLS(df["profit_max"], df["signal"]).fit()
    long_regression = sm.OLS(longs["profit_max"], longs["signal"]).fit()
    short_regression = sm.OLS(shorts["profit_max"], shorts["signal"]).fit()
    return look_back, look_forward, long_regression, short_regression
        


Run the grid search using a thread worker.

In [7]:
results = []

executor = futureproof.executors.ThreadPoolExecutor(max_workers=8)
with futureproof.TaskManager(executor, error_policy="log") as task_manager:
    
    # Send individual jobs to the multiprocess worker pool
    total_tasks = 0
    for look_back in look_backs:
        for look_forward in look_forwards:
            task_manager.submit(process_background_job, look_back, look_forward)
            total_tasks += 1

    print(f"Processing grid search {total_tasks} background jobs")

    # Run the background jobs and read back the results from the background worker
    # with a progress bar
    with tqdm(total=total_tasks) as progress_bar:
        for task in task_manager.as_completed():
            if isinstance(task.result, Exception):
                executor.join()
                raise RuntimeError(f"Could not complete task for args {task.args}") from task.result
            
            look_back, look_forward, long_regression, short_regression = task.result
            results.append([
                look_back,
                look_forward,
                long_regression.rsquared,
                short_regression.rsquared
            ])
            progress_bar.update()


# Display grid search results and give a background gradient colour based on 
# the performance of a grid combination
df = pd.DataFrame(results, columns=["Look back", "Look forward", "Regression long", "Regression short"])

value_cols = ["Regression long", "Regression short"]

formatted = df.style.background_gradient(
    axis = 0,
    subset = value_cols,
).highlight_min(
    color = 'pink',
    axis = 0,
    subset = value_cols,
).highlight_max(
    color = 'darkgreen',
    axis = 0,
    subset = value_cols,
)

pd.set_option('display.min_rows', 5000)

print("Max long correlation")
display(df[df["Regression long"] == df["Regression long"].max()])

print("Max short correlation")
display(df[df["Regression short"] == df["Regression short"].max()])

display(formatted)

Processing grid search 2025 background jobs


  0%|          | 0/2025 [00:00<?, ?it/s]

Max long correlation             Look back     Look forward  Regression long  Regression short
1349 19 days 12:00:00 29 days 12:00:00         0.468239          0.483737
Max short correlation             Look back     Look forward  Regression long  Regression short
2023 29 days 12:00:00 29 days 12:00:00         0.423188          0.540895


Unnamed: 0,Look back,Look forward,Regression long,Regression short
0,0 days 04:00:00,0 days 20:00:00,0.033335,0.049243
1,0 days 04:00:00,1 days 12:00:00,0.04711,0.067676
2,0 days 04:00:00,3 days 12:00:00,0.050875,0.075397
3,0 days 04:00:00,2 days 20:00:00,0.05057,0.07463
4,0 days 04:00:00,0 days 04:00:00,0.003418,0.002853
5,0 days 04:00:00,2 days 04:00:00,0.052192,0.070003
6,0 days 04:00:00,4 days 04:00:00,0.052356,0.074634
7,0 days 04:00:00,4 days 20:00:00,0.053728,0.076085
8,0 days 04:00:00,6 days 04:00:00,0.051629,0.082429
9,0 days 04:00:00,5 days 12:00:00,0.053273,0.078535
