In [1]:
import datetime
from pathlib import Path

import numpy as np
import pandas as pd
import artool
from artool import toy, analyze
from artool.toy.toy_simu import get_pnl_2side

# remove limits on number of rows and columns
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

import matplotlib

matplotlib.use("Agg")  # use to improve performance
import matplotlib.pyplot as plt
import seaborn as sns


## Data

In [2]:
data_dir = Path("/home/yangzhe/data/toy_data_2")
date_start = datetime.datetime(2022, 1, 1)
date_end = datetime.datetime(2022, 9, 1)
symbols = toy.toy_data.get_symbol_list(date_start, date_end, logic="and")
print(f"number of symbols: {len(symbols)}")

df = pd.DataFrame()
for symbol in symbols:
    df_ = pd.read_feather(data_dir / f"{symbol}.feather")
    # remove head/tail 5 rows
    df_ = df_.iloc[5:-5]
    df = pd.concat([df, df_], axis=0)
df = df.reset_index(drop=True)


number of symbols: 129


In [3]:
df.head()

Unnamed: 0,symbol,funding_timestamp,funding_rate,index_price,mark_price,funding_rate_expcumsum_0.5,funding_rate_expcumsum_1,funding_rate_expcumsum_2,funding_rate_expcumsum_3,funding_rate_expcumsum_5,funding_rate_expcumsum_10,funding_rate_expcumsum_20,funding_rate_expcumsum_50,funding_rate_rol_mean_3,funding_rate_rol_std_3,funding_rate_rol_max_3,funding_rate_rol_min_3,funding_rate_rol_skew_3,funding_rate_rol_mean_5,funding_rate_rol_std_5,funding_rate_rol_max_5,funding_rate_rol_min_5,funding_rate_rol_skew_5,funding_rate_rol_kurt_5,funding_rate_rol_mean_10,funding_rate_rol_std_10,funding_rate_rol_max_10,funding_rate_rol_min_10,funding_rate_rol_skew_10,funding_rate_rol_kurt_10,funding_rate_rol_mean_20,funding_rate_rol_std_20,funding_rate_rol_max_20,funding_rate_rol_min_20,funding_rate_rol_skew_20,funding_rate_rol_kurt_20,funding_rate_rol_mean_50,funding_rate_rol_std_50,funding_rate_rol_max_50,funding_rate_rol_min_50,funding_rate_rol_skew_50,funding_rate_rol_kurt_50,funding_rate_future_1,funding_rate_future_3,funding_rate_future_5,funding_rate_future_10,index_price_expcumsum_0.5,index_price_expcumsum_1,index_price_expcumsum_2,index_price_expcumsum_3,index_price_expcumsum_5,index_price_expcumsum_10,index_price_expcumsum_20,index_price_expcumsum_50,index_price_rol_mean_3,index_price_rol_std_3,index_price_rol_max_3,index_price_rol_min_3,index_price_rol_skew_3,index_price_rol_mean_5,index_price_rol_std_5,index_price_rol_max_5,index_price_rol_min_5,index_price_rol_skew_5,index_price_rol_kurt_5,index_price_rol_mean_10,index_price_rol_std_10,index_price_rol_max_10,index_price_rol_min_10,index_price_rol_skew_10,index_price_rol_kurt_10,index_price_rol_mean_20,index_price_rol_std_20,index_price_rol_max_20,index_price_rol_min_20,index_price_rol_skew_20,index_price_rol_kurt_20,index_price_rol_mean_50,index_price_rol_std_50,index_price_rol_max_50,index_price_rol_min_50,index_price_rol_skew_50,index_price_rol_kurt_50,mark_price_expcumsum_0.5,mark_price_expcumsum_1,mark_price_expcumsum_2,mark_price_expcumsum_3,mark_price_expcumsum_5,mark_price_expcumsum_10,mark_price_expcumsum_20,mark_price_expcumsum_50,mark_price_rol_mean_3,mark_price_rol_std_3,mark_price_rol_max_3,mark_price_rol_min_3,mark_price_rol_skew_3,mark_price_rol_mean_5,mark_price_rol_std_5,mark_price_rol_max_5,mark_price_rol_min_5,mark_price_rol_skew_5,mark_price_rol_kurt_5,mark_price_rol_mean_10,mark_price_rol_std_10,mark_price_rol_max_10,mark_price_rol_min_10,mark_price_rol_skew_10,mark_price_rol_kurt_10,mark_price_rol_mean_20,mark_price_rol_std_20,mark_price_rol_max_20,mark_price_rol_min_20,mark_price_rol_skew_20,mark_price_rol_kurt_20,mark_price_rol_mean_50,mark_price_rol_std_50,mark_price_rol_max_50,mark_price_rol_min_50,mark_price_rol_skew_50,mark_price_rol_kurt_50
0,1INCHUSDT,1641168000000000.0,0.0001,2.5558,2.55369,1.6e-05,5.7e-05,0.000127,0.000169,0.00021,0.000243,0.000258,0.000266,0.0001,0.0,0.0001,0.0001,,8e-05,4.381799e-05,0.0001,2e-06,-2.236068,5.0,4.5e-05,9.5e-05,0.0001,-0.000131,-1.722778,2.449991,4.5e-05,9.5e-05,0.0001,-0.000131,-1.722778,2.449991,4.5e-05,9.5e-05,0.0001,-0.000131,-1.722778,2.449991,0.0001,0.0003,0.0005,0.000649,0.398767,1.471091,3.684687,5.475949,7.876804,10.671087,12.552391,13.884777,2.526944,0.032202,2.5558,2.492206,-0.794579,2.496316,0.048747,2.5558,2.436312,0.038232,-1.736673,2.478711,0.061325,2.5558,2.390683,-0.18495,-0.942066,2.478711,0.061325,2.5558,2.390683,-0.18495,-0.942066,2.478711,0.061325,2.5558,2.390683,-0.18495,-0.942066,0.398449,1.470015,3.682155,5.472192,7.871317,10.663479,12.543303,13.874619,2.525463,0.032064,2.55369,2.4906,-0.891507,2.494815,0.04874,2.55369,2.434683,0.022356,-1.787568,2.476884,0.061883,2.55369,2.387231,-0.220803,-0.906635,2.476884,0.061883,2.55369,2.387231,-0.220803,-0.906635,2.476884,0.061883,2.55369,2.387231,-0.220803,-0.906635
1,1INCHUSDT,1641197000000000.0,0.0001,2.552671,2.552706,1.6e-05,5.8e-05,0.000138,0.000193,0.000254,0.00031,0.000341,0.000359,0.0001,0.0,0.0001,0.0001,,0.0001,1.438037e-12,0.0001,0.0001,,,5.3e-05,8.9e-05,0.0001,-0.000131,-1.936286,3.371,5.3e-05,8.9e-05,0.0001,-0.000131,-1.936286,3.371,5.3e-05,8.9e-05,0.0001,-0.000131,-1.936286,3.371,0.0001,0.0003,0.0005,0.000354,0.399434,1.480259,3.783149,5.752758,8.538932,11.965351,14.368379,16.111965,2.533559,0.035847,2.5558,2.492206,-1.717217,2.519588,0.039914,2.5558,2.464436,-0.670142,-1.714559,2.489276,0.062573,2.5558,2.390683,-0.467154,-1.075154,2.489276,0.062573,2.5558,2.390683,-0.467154,-1.075154,2.489276,0.062573,2.5558,2.390683,-0.467154,-1.075154,0.399395,1.479876,3.781634,5.75009,8.534468,11.958499,14.359767,16.102042,2.532332,0.036144,2.55369,2.4906,-1.730606,2.518419,0.040162,2.55369,2.463,-0.675302,-1.755936,2.487716,0.063345,2.55369,2.387231,-0.486312,-1.026122,2.487716,0.063345,2.55369,2.387231,-0.486312,-1.026122,2.487716,0.063345,2.55369,2.387231,-0.486312,-1.026122
2,1INCHUSDT,1641226000000000.0,0.0001,2.572775,2.5721,1.6e-05,5.8e-05,0.000144,0.00021,0.00029,0.000371,0.000419,0.00045,0.0001,0.0,0.0001,0.0001,,0.0001,1.438037e-12,0.0001,0.0001,,,5.9e-05,8.4e-05,0.0001,-0.000131,-2.123013,4.230293,5.9e-05,8.4e-05,0.0001,-0.000131,-2.123013,4.230293,5.9e-05,8.4e-05,0.0001,-0.000131,-2.123013,4.230293,0.0001,0.0003,0.0005,0.000253,0.402245,1.491028,3.855063,5.965505,9.097497,13.154641,16.114925,18.314757,2.560415,0.010818,2.572775,2.552671,1.570463,2.541256,0.030872,2.572775,2.492206,-1.171559,1.415686,2.499714,0.06502,2.572775,2.390683,-0.5849,-0.938841,2.499714,0.06502,2.572775,2.390683,-0.5849,-0.938841,2.499714,0.06502,2.572775,2.390683,-0.5849,-0.938841,0.402148,1.490639,3.853735,5.96311,9.093289,13.147829,16.106091,18.304369,2.559499,0.010924,2.5721,2.552706,1.716251,2.540239,0.031152,2.5721,2.4906,-1.187237,1.507926,2.498264,0.065798,2.5721,2.387231,-0.59995,-0.891632,2.498264,0.065798,2.5721,2.387231,-0.59995,-0.891632,2.498264,0.065798,2.5721,2.387231,-0.59995,-0.891632
3,1INCHUSDT,1641254000000000.0,0.0001,2.618719,2.618981,1.6e-05,5.8e-05,0.000148,0.000222,0.000319,0.000426,0.000494,0.000539,0.0001,0.0,0.0001,0.0001,,0.0001,1.438037e-12,0.0001,0.0001,,,6.3e-05,8e-05,0.0001,-0.000131,-2.291526,5.05495,6.3e-05,8e-05,0.0001,-0.000131,-2.291526,5.05495,6.3e-05,8e-05,0.0001,-0.000131,-2.291526,5.05495,0.0001,0.0003,0.0005,0.0002,0.408843,1.511892,3.926548,6.150866,9.592426,14.272327,17.819994,20.518966,2.581389,0.033856,2.618719,2.552671,1.070751,2.558434,0.04546,2.618719,2.492206,-0.304507,1.509558,2.512937,0.072613,2.618719,2.390683,-0.357957,-0.644019,2.512937,0.072613,2.618719,2.390683,-0.357957,-0.644019,2.512937,0.072613,2.618719,2.390683,-0.357957,-0.644019,0.408865,1.511845,3.925901,6.149337,9.589196,14.2664,17.811839,20.509041,2.581262,0.034074,2.618981,2.552706,1.12253,2.557615,0.046114,2.618981,2.4906,-0.284769,1.50749,2.511677,0.073535,2.618981,2.387231,-0.363964,-0.606498,2.511677,0.073535,2.618981,2.387231,-0.363964,-0.606498,2.511677,0.073535,2.618981,2.387231,-0.363964,-0.606498
4,1INCHUSDT,1641283000000000.0,0.0001,2.56189,2.5618,1.6e-05,5.8e-05,0.00015,0.000231,0.000343,0.000476,0.000565,0.000626,0.0001,0.0,0.0001,0.0001,,0.0001,1.438037e-12,0.0001,0.0001,,,6.7e-05,7.6e-05,0.0001,-0.000131,-2.446604,5.85827,6.7e-05,7.6e-05,0.0001,-0.000131,-2.446604,5.85827,6.7e-05,7.6e-05,0.0001,-0.000131,-2.446604,5.85827,0.0001,0.0003,0.0005,-1.2e-05,0.402045,1.49866,3.935436,6.242962,9.951112,15.232229,19.387847,22.623824,2.584461,0.030163,2.618719,2.56189,1.481736,2.572371,0.027023,2.618719,2.552671,1.820542,3.375939,2.517832,0.070189,2.618719,2.390683,-0.557416,-0.434953,2.517832,0.070189,2.618719,2.390683,-0.557416,-0.434953,2.517832,0.070189,2.618719,2.390683,-0.557416,-0.434953,0.402036,1.49861,3.934989,6.241802,9.948394,15.226785,19.380005,22.614007,2.584294,0.030479,2.618981,2.5618,1.51218,2.571855,0.027471,2.618981,2.552706,1.82138,3.381495,2.516689,0.071118,2.618981,2.387231,-0.563863,-0.401516,2.516689,0.071118,2.618981,2.387231,-0.563863,-0.401516,2.516689,0.071118,2.618981,2.387231,-0.563863,-0.401516


# Check corr

In [4]:
x_feature = "funding_rate_expcumsum_5"
y_feature = "funding_rate_future_5"

corr_dict = {}
for symbol in symbols:
    x = df.loc[df["symbol"] == symbol, x_feature].values
    y = df.loc[df["symbol"] == symbol, y_feature].values
    corr_dict[symbol] = np.corrcoef(x, y)[0, 1]
# Plot correlation_distribution
fig, ax = plt.subplots()
sns.kdeplot(list(corr_dict.values()), ax=ax)
ax.set_title("x vs y correlation")
fig.savefig("corr_dist.png")

In [5]:
# find loswest 5 correlation
corr_dict_sorted = sorted(corr_dict.items(), key=lambda x: x[1])
low_cor_symbols = corr_dict_sorted[:5]
print(low_cor_symbols)

[('UNFIUSDT', 0.17267762271829357), ('STMXUSDT', 0.1739725080768929), ('ZILUSDT', 0.19360660482574152), ('CVCUSDT', 0.2110023644855364), ('OCEANUSDT', 0.22586015942273072)]


In [8]:
symb = "UNFIUSDT"
for symb, _ in low_cor_symbols:
    print(f"## symbol: {symb}")
    df_tmp = df.loc[df["symbol"] == symb]

    max_corr = 0
    max_corr_feature = ""
    y = df_tmp[y_feature].values
    for feature in df_tmp.columns:
        if feature in ["symbol", "funding_timestamp"]:
            continue
        if "future" in feature:
            continue
        x = df_tmp[feature].values
        corr = np.corrcoef(x, y)[0, 1]
        if corr > max_corr:
            max_corr = corr
            max_corr_feature = feature
    print(f"max_corr: {max_corr}, max_corr_feature: {max_corr_feature}")


## symbol: UNFIUSDT
max_corr: 0.23371708761093968, max_corr_feature: funding_rate_expcumsum_1
## symbol: STMXUSDT
max_corr: 0.228767401399035, max_corr_feature: funding_rate_expcumsum_50
## symbol: ZILUSDT
max_corr: 0.3662137445962372, max_corr_feature: funding_rate
## symbol: CVCUSDT
max_corr: 0.3092344492665794, max_corr_feature: funding_rate_rol_max_3
## symbol: OCEANUSDT
max_corr: 0.30708753899576674, max_corr_feature: funding_rate_expcumsum_0.5
