# Trade use trend strategy long+short

In [1]:
import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import artool
from artool import toy, analyze
from artool.toy.toy_simu import get_pnl_2side

# remove limits on number of rows and columns
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

import matplotlib

matplotlib.use("Agg")  # use to improve performance
import matplotlib.pyplot as plt
import seaborn as sns


## Data


In [2]:
data_dir = Path("/home/yangzhe/data/toy_data_2")
date_start = datetime.datetime(2022, 1, 1)
date_end = datetime.datetime(2022, 9, 1)
symbols = toy.toy_data.get_symbol_list(date_start, date_end, logic="and")
print(f"number of symbols: {len(symbols)}")

df = pd.DataFrame()
for symbol in symbols:
    df_ = pd.read_feather(data_dir / f"{symbol}.feather")
    # remove head/tail 5 rows
    df_ = df_.iloc[5:-5]
    df = pd.concat([df, df_], axis=0)
df = df.reset_index(drop=True)


number of symbols: 129


In [3]:
train_end_date = datetime.datetime(2022, 6, 1)
train_idx = df["funding_timestamp"] < train_end_date.timestamp() * 1e6
test_idx = ~train_idx
df_train = df[train_idx]
df_test = df[test_idx]

x_feature = "funding_rate_expcumsum_5"
y_feature = "funding_rate_future_5"


In [4]:
best_buy_short = 0.0002
best_sell_short = -0.0001
buy_dim = np.linspace(-5e-4, 0, 21)
sell_dim = np.linspace(-1e-4, 3e-4, 41)


## Method 0

Use all symbols available.

In [5]:
method_0_dir = Path("./method_0")
method_0_dir.mkdir(exist_ok=True)


toy

In [6]:
buythres_short = best_buy_short
sellthres_short = best_sell_short
buythres_long = -1e-4
sellthres_long = 0
total_pnl = 0
for symbol in symbols:
    x = df_test.loc[df_test["symbol"] == symbol, x_feature].values
    y = df_test.loc[df_test["symbol"] == symbol, y_feature].values
    fr = df_test.loc[df_test["symbol"] == symbol, "funding_rate"].values
    cur_pnl = get_pnl_2side(
        x,
        fr,
        buythres_short,
        sellthres_short,
        buythres_long,
        sellthres_long,
        verbose=True,
    )
    total_pnl += cur_pnl
pnl_rate = total_pnl / len(symbols)
print(f"total_pnl: {total_pnl}, n_symbols: {len(symbols)}")
print(f"pnl_rate (y): {pnl_rate * 4 * 100} %")


Position len 276
Number of non-zero short: 117
Number of non-zero long: 137
Position len 276
Number of non-zero short: 126
Number of non-zero long: 126
Position len 276
Number of non-zero short: 94
Number of non-zero long: 129
Position len 276
Number of non-zero short: 106
Number of non-zero long: 135
Position len 276
Number of non-zero short: 192
Number of non-zero long: 56
Position len 276
Number of non-zero short: 184
Number of non-zero long: 74
Position len 276
Number of non-zero short: 209
Number of non-zero long: 53
Position len 276
Number of non-zero short: 137
Number of non-zero long: 124
Position len 276
Number of non-zero short: 172
Number of non-zero long: 78
Position len 276
Number of non-zero short: 113
Number of non-zero long: 138
Position len 276
Number of non-zero short: 242
Number of non-zero long: 24
Position len 276
Number of non-zero short: 183
Number of non-zero long: 74
Position len 276
Number of non-zero short: 71
Number of non-zero long: 169
Position len 276
Num

Scan train to get optimal thresould

In [12]:
# scan buythres and sellthres
dims = {"buythres": buy_dim, "sellthres": sell_dim}


def obj_train(buythres, sellthres):
    total_pnl = 0
    for symbol in symbols:
        x = df_train.loc[df_train["symbol"] == symbol, x_feature].values
        fr = df_train.loc[df_train["symbol"] == symbol, "funding_rate"].values
        cur_pnl = get_pnl_2side(
            x, fr, best_buy_short, best_sell_short, buythres, sellthres
        )
        total_pnl += cur_pnl
    pnl_rate = total_pnl / len(symbols)
    return pnl_rate * 12 / 5


df_scan = artool.analyze.scan.grid_scan(dims, obj_train)

# find optimal buythres and sellthres
df_tmp = df_scan.sort_values(by="score", ascending=False)
best_buy = df_tmp.iloc[0]["buythres"]
best_sell = df_tmp.iloc[0]["sellthres"]
max_agr = df_tmp.iloc[0]["score"]  # max annual growth rate
print(f"best_buy: {best_buy:.6f}, best_sell: {best_sell:.6f}")
print(f"max profit rate per year: {max_agr * 100:.4f} %")
opt_train = {
    "buythres": best_buy,
    "sellthres": best_sell,
    "max_agr": max_agr,
}

# Plot heat map
fig, ax = plt.subplots(figsize=(8, 8))
df_tmp = df_scan.round(6).pivot("buythres", "sellthres", "score")
sns.heatmap(df_tmp, ax=ax)
# add max_agr
ax.set_title(f"max_agr: {max_agr * 100:.4f} %")
fig.savefig(method_0_dir / "trend_sig.train.png")
plt.close(fig)


100%|██████████| 861/861 [00:38<00:00, 22.59it/s]


best_buy: -0.000275, best_sell: 0.000180
max profit rate per year: 4.6199 %


Scan test for validation

In [14]:
# scan buythres and sellthres
dims = {"buythres": buy_dim, "sellthres": sell_dim}


def obj_test(buythres, sellthres):
    total_pnl = 0
    for symbol in symbols:
        x = df_test.loc[df_test["symbol"] == symbol, x_feature].values
        fr = df_test.loc[df_test["symbol"] == symbol, "funding_rate"].values
        cur_pnl = get_pnl_2side(
            x, fr, best_buy_short, best_sell_short, buythres, sellthres
        )
        total_pnl += cur_pnl
    pnl_rate = total_pnl / len(symbols)
    return pnl_rate * 4


df_scan = artool.analyze.scan.grid_scan(dims, obj_test)

# find optimal buythres and sellthres
df_tmp = df_scan.sort_values(by="score", ascending=False)
best_buy = df_tmp.iloc[0]["buythres"]
best_sell = df_tmp.iloc[0]["sellthres"]
max_agr = df_tmp.iloc[0]["score"]  # max annual growth rate
print(f"best_buy: {best_buy:.6f}, best_sell: {best_sell:.6f}")
print(f"max profit rate per year: {max_agr * 100:.4f} %")
opt_test = {
    "buythres": best_buy,
    "sellthres": best_sell,
    "max_agr": max_agr,
}

# Plot heat map
fig, ax = plt.subplots(figsize=(8, 8))
df_tmp = df_scan.round(6).pivot("buythres", "sellthres", "score")
sns.heatmap(df_tmp, ax=ax)
ax.set_title(f"max_agr: {max_agr * 100:.4f} %")
fig.savefig(method_0_dir / "trend_sig.test.png")
plt.close(fig)


100%|██████████| 861/861 [00:25<00:00, 34.41it/s]


best_buy: -0.000175, best_sell: 0.000160
max profit rate per year: 4.4903 %


Check optimal training result at test

In [15]:
agr_tr_opt = obj_test(opt_train["buythres"], opt_train["sellthres"])
agr_diff = (agr_tr_opt - opt_test["max_agr"]) / opt_test["max_agr"]
print(f"agr_diff: {agr_diff * 100:.4f} %")


agr_diff: -0.6933 %


## Method 1

Only use symbol with high correlation

In [16]:
method_1_dir = Path("./method_1")
method_1_dir.mkdir(exist_ok=True)


Check correlations

In [17]:
corr_dict = {}
for symbol in symbols:
    x = df_train.loc[df_train["symbol"] == symbol, x_feature].values
    y = df_train.loc[df_train["symbol"] == symbol, y_feature].values
    corr_dict[symbol] = np.corrcoef(x, y)[0, 1]
# Plot correlation_distribution
fig, ax = plt.subplots()
sns.kdeplot(list(corr_dict.values()), ax=ax)
ax.set_title("x vs y correlation")
fig.savefig(method_1_dir / "corr_dist.png")


Scan train and find optimal threshold

In [18]:
agr_dict_tr = {}
for corr_thre in [0.3, 0.5, 0.7, 0.72, 0.73, 0.74, 0.75, 0.76]:
    # corr_thre = 0.65
    symbols_high_corr = [k for k, v in corr_dict.items() if v > corr_thre]
    print(f"# num of symbols with corr > {corr_thre}: {len(symbols_high_corr)}")
    if len(symbols_high_corr) == 0:
        continue

    # scan buythres and sellthres
    dims = {"buythres": buy_dim, "sellthres": sell_dim}

    def obj_train(buythres, sellthres):
        total_pnl = 0
        for symbol in symbols_high_corr:
            x = df_train.loc[df_train["symbol"] == symbol, x_feature].values
            fr = df_train.loc[df_train["symbol"] == symbol, "funding_rate"].values
            cur_pnl = get_pnl_2side(
                x, fr, best_buy_short, best_sell_short, buythres, sellthres
            )
            total_pnl += cur_pnl
        pnl_rate = total_pnl / len(symbols_high_corr)
        return pnl_rate * 12 / 5

    df_scan = artool.analyze.scan.grid_scan(dims, obj_train)

    # find optimal buythres and sellthres
    df_tmp = df_scan.sort_values(by="score", ascending=False)
    best_buy = df_tmp.iloc[0]["buythres"]
    best_sell = df_tmp.iloc[0]["sellthres"]
    max_agr = df_tmp.iloc[0]["score"]  # max annual growth rate
    print(f"best_buy: {best_buy:.6f}, best_sell: {best_sell:.6f}")
    print(f"max profit rate per year: {max_agr * 100:.4f} %")
    agr_dict_tr[corr_thre] = {
        "buythres": best_buy,
        "sellthres": best_sell,
        "max_agr": max_agr,
    }

    # Plot heat map
    fig, ax = plt.subplots(figsize=(8, 8))
    df_tmp = df_scan.round(6).pivot("buythres", "sellthres", "score")
    sns.heatmap(df_tmp, ax=ax)
    # add max_agr
    ax.set_title(f"max_agr: {max_agr * 100:.4f} %")
    fig.savefig(method_1_dir / f"trend_sig_corr_{corr_thre:.2f}.train.png")
    plt.close(fig)


# num of symbols with corr > 0.3: 117


100%|██████████| 861/861 [00:34<00:00, 24.90it/s]


best_buy: -0.000275, best_sell: 0.000180
max profit rate per year: 4.7283 %
# num of symbols with corr > 0.5: 79


100%|██████████| 861/861 [00:23<00:00, 36.85it/s]


best_buy: -0.000325, best_sell: 0.000290
max profit rate per year: 4.8788 %
# num of symbols with corr > 0.7: 11


100%|██████████| 861/861 [00:03<00:00, 259.62it/s]


best_buy: -0.000350, best_sell: 0.000140
max profit rate per year: 5.5023 %
# num of symbols with corr > 0.72: 9


100%|██████████| 861/861 [00:02<00:00, 313.70it/s]


best_buy: -0.000350, best_sell: 0.000150
max profit rate per year: 5.7712 %
# num of symbols with corr > 0.73: 7


100%|██████████| 861/861 [00:02<00:00, 406.90it/s]


best_buy: -0.000350, best_sell: 0.000150
max profit rate per year: 6.1731 %
# num of symbols with corr > 0.74: 5


100%|██████████| 861/861 [00:01<00:00, 549.60it/s]


best_buy: -0.000350, best_sell: 0.000140
max profit rate per year: 6.3527 %
# num of symbols with corr > 0.75: 3


100%|██████████| 861/861 [00:01<00:00, 857.88it/s] 


best_buy: -0.000375, best_sell: 0.000270
max profit rate per year: 6.6136 %
# num of symbols with corr > 0.76: 1


100%|██████████| 861/861 [00:00<00:00, 1532.12it/s]


best_buy: -0.000125, best_sell: 0.000060
max profit rate per year: 10.8689 %


In [19]:
# Plot max_agr vs corr_thre
fig, ax = plt.subplots()
sns.lineplot(
    list(agr_dict_tr.keys()), [v["max_agr"] for v in agr_dict_tr.values()], ax=ax
)
ax.set_title("max_agr vs corr_thre")
fig.savefig(method_1_dir / "max_agr_vs_corr_thre.train.png")




Scan test

In [20]:
agr_dict_te = {}
for corr_thre in [0.3, 0.5, 0.7, 0.72, 0.73, 0.74, 0.75, 0.76]:
    # corr_thre = 0.65
    symbols_high_corr = [k for k, v in corr_dict.items() if v > corr_thre]
    print(f"# num of symbols with corr > {corr_thre}: {len(symbols_high_corr)}")
    if len(symbols_high_corr) == 0:
        continue

    # scan buythres and sellthres
    dims = {"buythres": buy_dim, "sellthres": sell_dim}

    def obj_test(buythres, sellthres):
        total_pnl = 0
        for symbol in symbols_high_corr:
            x = df_test.loc[df_test["symbol"] == symbol, x_feature].values
            fr = df_test.loc[df_test["symbol"] == symbol, "funding_rate"].values
            cur_pnl = get_pnl_2side(
                x, fr, best_buy_short, best_sell_short, buythres, sellthres
            )
            total_pnl += cur_pnl
        pnl_rate = total_pnl / len(symbols_high_corr)
        return pnl_rate * 4

    df_scan = artool.analyze.scan.grid_scan(dims, obj_test)

    # find optimal buythres and sellthres
    df_tmp = df_scan.sort_values(by="score", ascending=False)
    best_buy = df_tmp.iloc[0]["buythres"]
    best_sell = df_tmp.iloc[0]["sellthres"]
    max_agr = df_tmp.iloc[0]["score"]  # max annual growth rate
    print(f"best_buy: {best_buy:.6f}, best_sell: {best_sell:.6f}")
    print(f"max profit rate per year: {max_agr * 100:.4f} %")
    agr_dict_te[corr_thre] = {
        "buythres": best_buy,
        "sellthres": best_sell,
        "max_agr": max_agr,
    }

    # Plot heat map
    fig, ax = plt.subplots(figsize=(8, 8))
    df_tmp = df_scan.round(6).pivot("buythres", "sellthres", "score")
    sns.heatmap(df_tmp, ax=ax)
    # add max_agr
    ax.set_title(f"max_agr: {max_agr * 100:.4f} %")
    fig.savefig(method_1_dir / f"trend_sig_corr_{corr_thre:.2f}.test.png")
    plt.close(fig)


# num of symbols with corr > 0.3: 117


100%|██████████| 861/861 [00:22<00:00, 38.67it/s]


best_buy: -0.000150, best_sell: 0.000130
max profit rate per year: 4.5721 %
# num of symbols with corr > 0.5: 79


100%|██████████| 861/861 [00:15<00:00, 56.55it/s]


best_buy: -0.000200, best_sell: 0.000240
max profit rate per year: 4.3189 %
# num of symbols with corr > 0.7: 11


100%|██████████| 861/861 [00:02<00:00, 418.52it/s]


best_buy: -0.000350, best_sell: 0.000240
max profit rate per year: 4.5800 %
# num of symbols with corr > 0.72: 9


100%|██████████| 861/861 [00:01<00:00, 484.60it/s]


best_buy: -0.000225, best_sell: 0.000130
max profit rate per year: 4.8142 %
# num of symbols with corr > 0.73: 7


100%|██████████| 861/861 [00:01<00:00, 604.64it/s]


best_buy: -0.000350, best_sell: 0.000280
max profit rate per year: 5.5324 %
# num of symbols with corr > 0.74: 5


100%|██████████| 861/861 [00:01<00:00, 508.86it/s]


best_buy: -0.000350, best_sell: 0.000280
max profit rate per year: 6.2073 %
# num of symbols with corr > 0.75: 3


100%|██████████| 861/861 [00:00<00:00, 1621.63it/s]


best_buy: -0.000350, best_sell: 0.000280
max profit rate per year: 4.6487 %
# num of symbols with corr > 0.76: 1


100%|██████████| 861/861 [00:00<00:00, 2130.77it/s]


best_buy: -0.000350, best_sell: 0.000230
max profit rate per year: 3.5299 %


In [21]:
# Plot max_agr vs corr_thre
fig, ax = plt.subplots()
ax.plot(
    list(agr_dict_tr.keys()),
    [v["max_agr"] for v in agr_dict_tr.values()],
    label="train",
)
ax.plot(
    list(agr_dict_te.keys()), [v["max_agr"] for v in agr_dict_te.values()], label="test"
)
ax.legend()
ax.set_title("max_agr vs corr_thre")
fig.savefig(method_1_dir / "max_agr_vs_corr_thre.png")


In [None]:
df_test.head()


In [None]:
df_tmp = pd.read_csv(
    "/home/shared/coin/tardis_derivative_ticker/2022-09-30/BTCUSDT.csv"
)


In [None]:
df_tmp.sample(10)


In [None]:
df_tmp.describe()
