# Import Modules

In [1]:
import os
import random
import pickle
import importlib
import argparse

import numpy as np
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import torch

In [2]:
import config
import model
import utility

if utility.is_notebook():
    from tqdm.notebook import tqdm
else:
    from tqdm import tqdm

# Set Random Seed

In [3]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)

set_seed(42)

# Parse configs

In [4]:
configs = config.configs
parser = argparse.ArgumentParser(description='Setting configs')
utility.add_arguments(configs, parser)

args, unknown = parser.parse_known_args()
for arg in vars(args):
    value = getattr(args, arg)
    if value is not None:
        keys = arg.split(".")
        utility.nested_dict_update(value, configs, keys, indent=0)

# Asset Pool
- Original Pool: Among the top twenty cryptocurrencies by market capitalization, excluding CEX coins, stablecoins, and coins without perpetual contracts on Binance.
- Selected Pool: Top ten cryptocurrencies by market capitalization in the original pool.  
- Pools Update Frequency: Every Monday at 00:00 UTC+0.
- Trading Target: Perpetual contracts on Binance for cryptocurrencies in the selected pool.
- Data Resolution: **1 minute**

In [5]:
#load data
binance_swaps = pd.read_csv(os.path.join("data", "binance_swaps.csv"), index_col=0)
binance_swaps_return = (binance_swaps/binance_swaps.shift(1) - 1)[1:]
binance_swaps_return.index = pd.to_datetime(binance_swaps_return.index)
with open(os.path.join("data", "weekly_crypto_top20.pkl"), 'rb') as f:
    weekly_crypto_top20 = pickle.load(f)

# Backtesting Method
- Rolling basis (walk forward optimization)
- Training Period: t-40320 ~ t-10081 (30240 minutes or 21 days in total)
- Validation Period: t-10080 ~ t-1 (10080 minutes or 7 days in total)
- Testing Period: t ~ t+10079 (10080 minutes or 7 days in total)
- t is the start of each week, i.e., 00:01 on Monday.

![image.png](image/bt_flow.png)

In [6]:
#Prepare training, validation, and testing periods and tickers for each week.
stables = {"BUSD", "USDC", "UST", "DAI", "TUSD", "USDT"}
cexs = {"FTT", "BNB", "HT", "OKB", "CRO", "LEO"}
not_listed = {"WBTC", "TON"}
name_changed = {"SHIB": "1000SHIB"}
remove = stables | cexs | not_listed
setting_list = []
start_date = "2023-01-01"

for k, v in weekly_crypto_top20.items():
    valid_tickers = set(v) - remove
    t = pd.to_datetime(k) + pd.Timedelta("1min")
    listed = list(binance_swaps_return.loc[t].dropna().index) # Check whether a swap is listed on Binance at the start of the week.
    if t <= pd.to_datetime(start_date):
        continue
    valid_tickers_top10 = []
    for ticker in v:
        if ticker in valid_tickers and ticker in listed:
            if ticker in name_changed.keys():
                valid_tickers_top10.append(name_changed[ticker])
            else:
                valid_tickers_top10.append(ticker)
    valid_tickers_top10 = valid_tickers_top10[:10]

    testing_period = (t, t +  pd.Timedelta("10079min"))
    validation_period = (t - pd.Timedelta("10080min"), t - pd.Timedelta("1min"))
    training_period = (t - pd.Timedelta("40320min"), t - pd.Timedelta("10081min"))
    
    setting_list.append([training_period, validation_period, testing_period, valid_tickers_top10])

# Training

In [7]:
# Create folder for saving
utility.create_dir(configs["setting"]["save_dir"])

## TCN TODO: regen main.py, clean model.py, testing
### Structure
- Given $N$ assets, at timestamp $\tau$, we takes the return time series ($N \times L$) from $\tau$ to $\tau - L + 1$ as the model input, uses the model to predict the optimal asset weight $w_{\tau}$ ($N \times 1$), and holds or shorts the assets with this weight to timestamp $\tau + 1$.
- Here, we focus on having a long-short portfolio that is dollar-neutral. Therefore, the weight predicted needs to satisfy the following constraint:
    - $\sum_{i=1}^{N} |{w_{\tau i}|} = 1, -1 \le w_{\tau i} \le 1$
    - $\sum_{i=1}^{N} \max(w_{\tau i}, 0) + \sum_{i=1}^{N} \min(w_{\tau i}, 0) = 0$

![image.png](image/model.png)

### Loss Function
- The model aims to predict the weight of each crypto swap to maximize the Sharpe ratio at future timestamps. The loss function for updating model for this purpose is defined as follows:
    - $R_{t} = \sum_{i = 0}^{N-1}w_{i(t-1)}R_{it}$
    - $E(R) = \frac{1}{T}\sum_{t=1}^{T}R_t$
    - $Std(R) = \frac{1}{T-1}\sum_{t=1}^{T}[R_t - E(R)]^2$
    - $Loss = -\frac{E(R)}{Std(R)}$

### Training Procedure
- For a given hyperparameter set, the model with the best validation loss will be saved during training.
- The hyperparameters are also tuned based on the best validation loss.

In [8]:
# Create folder for saving model training results
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
model_out_dir_root = os.path.join(configs["setting"]["save_dir"], configs["portfolio_config"]["model"] + f"_{timestamp}")
utility.create_dir(model_out_dir_root)

In [None]:
input_length = configs["portfolio_config"]["input_length"]
input_td = pd.Timedelta(f"{input_length}min")
for setting in setting_list:
    #Prepare data for each week
    training_period, validation_period, testing_period, tickers = setting
    training_return = binance_swaps_return[tickers].loc[training_period[0]-input_td:training_period[1]].fillna(0)
    validation_return = binance_swaps_return[tickers].loc[validation_period[0]-input_td:validation_period[1]].fillna(0)
    testing_return = binance_swaps_return[tickers].loc[testing_period[0]-input_td:testing_period[1]].fillna(0)
    data_list = [training_return, validation_return]

    model_out_dir = os.path.join(model_out_dir_root, testing_period[0].isoformat().replace(":", "") + "_" + testing_period[1].isoformat().replace(":", ""))
    utility.create_dir(model_out_dir)
    nn_model = model.NNModel(data_list, configs, "TCN", model_out_dir)
    nn_model.training()

TRAINING:  2022-12-04 00:01:00 ~ 2022-12-25 00:00:00
VALIDATION:  2022-12-25 00:01:00 ~ 2023-01-01 00:00:00


  0%|          | 0/256 [00:00<?, ?it/s]

TRAINING:  2022-12-11 00:01:00 ~ 2023-01-01 00:00:00
VALIDATION:  2023-01-01 00:01:00 ~ 2023-01-08 00:00:00


  0%|          | 0/256 [00:00<?, ?it/s]

TRAINING:  2022-12-18 00:01:00 ~ 2023-01-08 00:00:00
VALIDATION:  2023-01-08 00:01:00 ~ 2023-01-15 00:00:00


## Modern Portfolio Theory (MPT)

In [None]:
# Create folder for saving model optimization results
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
model_out_dir_root = os.path.join(configs["setting"]["save_dir"], "MPT" + f"_{timestamp}")
utility.create_dir(model_out_dir_root)

input_length = configs["portfolio_config"]["input_length"]
input_td = pd.Timedelta(f"{input_length}min")
for setting in setting_list:
    #Prepare data for each week
    training_period, validation_period, testing_period, tickers = setting
    training_return = binance_swaps_return[tickers].loc[training_period[0]-input_td:validation_period[1]].fillna(0)
    validation_return = None #Validation is optional for traditional approaches.
    data_list = [training_return, validation_return]

    model_out_dir = os.path.join(model_out_dir_root, testing_period[0].isoformat().replace(":", "") + "_" + testing_period[1].isoformat().replace(":", ""))
    utility.create_dir(model_out_dir)
    mpt_model = model.Markowitz(data_list, configs, model_out_dir)
    mpt_model.training()