# Import Statements

In [123]:
import pandas as pd
import warnings
import zipfile
import requests
import numpy as np
from plotly.io import show
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from skfolio import Population, RiskMeasure
from skfolio.optimization import MeanRisk, ObjectiveFunction
warnings.filterwarnings("ignore")

# Data Preprocessing

To avoid some missing data, we start our sample in 2023. If a value is missing, we forward fill it. We filter the crypto pairs to select already established cryptos that have consequent volume (this bears a risk as it encompasses survivorship bias).

In [124]:
# Path to the ZIP file
zip_file_path = 'Data/data_60min.csv.zip'

# Specify the name of the CSV file inside the ZIP archive
csv_file_name = 'data_60min.csv'

# Open the ZIP file and read the CSV file directly into pandas
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # Read the CSV file from the zip without extracting it to the disk
    with zip_ref.open(csv_file_name) as file:
        # Read the CSV content into a pandas DataFrame
        data = pd.read_csv(file).drop_duplicates()

data = data.set_index("time", drop=True)
data = data["2023":]
data = data.ffill()
crypto_symbols = sorted(set([col.split("_")[0] for col in data.columns]))
cryptos = ["ADAUSD", "ALGOUSD", "ATOMUSD", "DOTUSD",  "ETHUSD", "FLOWUSD", "FLRUSD",  "MATICUSD", "MINAUSD", "SCRTUSD", "SEIUSD", "SOLUSD", "TIAUSD", "TRXUSD", "XTZUSD", "1INCHUSD", "AAVEUSD",  "COMPUSD", "LTCUSD", "XRPUSD", "ZRXUSD"]
crypto_symbols = [crypto for crypto in crypto_symbols if crypto in cryptos]
data = data[[crypto + "_spot" for crypto in crypto_symbols] + [crypto + "_perp" for crypto in crypto_symbols]]
data.head()

Unnamed: 0_level_0,1INCHUSD_spot,AAVEUSD_spot,ADAUSD_spot,ALGOUSD_spot,ATOMUSD_spot,COMPUSD_spot,DOTUSD_spot,ETHUSD_spot,FLOWUSD_spot,FLRUSD_spot,...,MATICUSD_perp,MINAUSD_perp,SCRTUSD_perp,SEIUSD_perp,SOLUSD_perp,TIAUSD_perp,TRXUSD_perp,XRPUSD_perp,XTZUSD_perp,ZRXUSD_perp
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-01 00:00:00,0.383,51.75,0.244274,0.17165,9.3826,31.3,4.283,1194.14,0.648,,...,0.7517,0.4372,,,9.97,,0.054501,0.33836,0.7176,
2023-01-01 01:00:00,0.383,51.68,0.24498,0.17182,9.3021,31.3,4.2957,1195.0,0.65,,...,0.7538,0.4372,,,10.01,,0.054694,0.33892,0.7176,
2023-01-01 02:00:00,0.384,51.9,0.245571,0.1712,9.3577,31.19,4.299,1194.75,0.651,,...,0.7526,0.43,,,10.0,,0.054652,0.33756,0.7176,
2023-01-01 03:00:00,0.383,51.68,0.2451,0.17287,9.3691,31.12,4.2951,1192.72,0.65,,...,0.7512,0.43,,,9.95,,0.054699,0.33824,0.7176,
2023-01-01 04:00:00,0.382,51.12,0.244368,0.17148,9.3088,31.12,4.2782,1192.12,0.646,,...,0.7473,0.43,,,9.82,,0.054652,0.33584,0.7176,


# Strategy

In [125]:

for crypto in crypto_symbols:
    data[crypto+"_spread"] = data[crypto + "_perp"] - data[crypto + "_spot"]
    data[crypto+"_signal"] = data[crypto+"_spread"] / data[crypto+"_spread"].rolling(5*24).std()



# # Plot the spreads using Plotly
# fig = go.Figure()
# for crypto in crypto_symbols:

#     fig.add_trace(go.Scatter(x=data.index, 
#                              y=data[crypto + "_signal"], 
#                              mode='lines', 
#                              name=f"{crypto}"))

# fig.update_layout(
#     title="Rolling Z-score for Spread between Spot and Perpetual Futures",
#     xaxis_title="Time",
#     yaxis_title="Z-score Spread (USD)",
#     legend_title="Cryptocurrency",
#     template="plotly_white"
# )

# fig.show()

After computing the spreads between the spot price and the perpetual future price, we build a strategy that consists in :
If the spread is larger (in absolute) than the cost of taking a position times a threshold, we open a position until the spread reaches zero. As the spread is mean reverting (funding fees force perpetual futures to converge to spot price over time), such trades are supposed to be arbitrages. Some risks still remain : first, the data we are using are hourly close, meaning it is likely that these prices are not the prices we can get executed at (bid-ask spread, slippage, market impact). As we are trading futures, there are liquidation risks in case of high volatility, especially on kraken platform where holding some spot cryptos is not necesarilly considered liquidity margin for the same crypto future. Another approximation we do not account for is the fact that we are subject to a funding fee for holding perpetual futures. However, by the nature of the strategy, we are sure that this fee is going to be granted to us rather than paid by us. 

To account for this, we build time series that replicates the portfolio value of executing the strategy for each crypto, by accounting for the fees and bid-ask spread. Initial values for the cost are the maximum fees that Kraken Platform can charge.

In [126]:
threshold = 2.2


def get_mean_recent_spread(crypto_pair):
    """
    Fetches the mean spread for the given cryptocurrency pair from Kraken's API.
    
    Args:
    - crypto_pair (str): The cryptocurrency pair (e.g., 'BTCUSD', 'ETHUSD').

    Returns:
    - float: The mean spread for the given pair, or None if data is unavailable.
    """
    # Make the API request to Kraken for the specified crypto pair
    # Kraken API URL for the "Get Recent Spreads" endpoint
    kraken_url = 'https://api.kraken.com/0/public/Spread'

    params = {'pair': crypto_pair}
    response = requests.get(kraken_url, params=params)
    
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
    

        spread_data = list(data['result'].values())[0]
        
        # Calculate the spread for each trade (ask - bid)
        spreads = [(float(spread[2]) - float(spread[1])) / float(spread[1]) for spread in spread_data]
        
        # Compute the mean spread
        mean_spread = np.mean(spreads)
        return mean_spread
      

def calculate_pnl(df, crypto, threshold = 5, future_fees = 0.0005, spot_fees = 0.004, bid_ask_spread = 0.005):
    """
    Compute P&L for the trading strategy where trades are entered
    based on z-score thresholds and exited when the spread returns to zero.
    """
    # Initialize variables
    position = 0  # Current position: 1 for long, -1 for short, 0 for no position
    entry_price = 0
    positions = []
    pnl = []
    previous = 0
    for i in df.index:
        spread = df.loc[i][crypto+"_spread"]
        fees = df.loc[i][crypto+"_spot"] * (spot_fees + bid_ask_spread / 2) + df.loc[i][crypto+"_perp"] * (future_fees + bid_ask_spread / 2)
        zscore = df.loc[i][crypto+"_signal"] 
        
        pnl_ = position * (spread - previous)

        # Check entry conditions
        if position == 0:  # No open position
            if abs(spread) - threshold *fees > 0:
                if zscore > 0:
                    # Open a short position
                    position = -1
                    entry_price = spread 
                    previous = entry_price
                    pnl_ = pnl_ -fees
                    
                elif zscore < 0:
                    # Open a long position
                    position = 1
                    entry_price = spread 
                    previous= entry_price
                    pnl_ = pnl_ -fees

            


        # Check exit condition
        elif position != 0 and zscore * position >= 0:
            pnl_ = position * (spread - entry_price)
            position = 0  # Reset position
            previous = spread
            pnl_ = pnl_ -fees

        else:
            previous = spread
            
        
       # Calculate P&L
        positions.append(position)
        pnl.append(pnl_)
    df[crypto+"_position"] = positions
    df[crypto+"_pnl"] = pnl

    return df


for crypto in crypto_symbols:
    data = calculate_pnl(data, crypto, threshold = threshold, bid_ask_spread=get_mean_recent_spread(crypto))
    data[crypto+"_portfolio_value"] = data[crypto+"_pnl"].cumsum() 


# Plot portfolio values using Plotly
portfolio_columns = [col for col in data.columns if "portfolio" in col]

# fig_portfolio = go.Figure()

# for col in portfolio_columns:
#     fig_portfolio.add_trace(go.Scatter(x=data.index, 
#                                        y=data[col], 
#                                        mode='lines', 
#                                        name=col))

# fig_portfolio.update_layout(
#     title="Portfolio Value Over Time",
#     xaxis_title="Time",
#     yaxis_title="Portfolio Value (USD)",
#     legend_title="Portfolio",
#     template="plotly_white"
# )

# fig_portfolio.show()

# Strategy Weights

We use skfolio module to compute the weights of the different crypto allocations by maximising Sharpe Ratio on in sample 2023, and test on 2024.

In [127]:
X = data[portfolio_columns].ffill().fillna(0)
X = 1 + X
X.columns = [col.split("_")[0] for col in X.columns]



X = X.pct_change().dropna()
X_train, X_test = train_test_split(X, test_size=0.5, shuffle=False)
X_train = X_train[[col for col in X_train.columns if X_train[col].std() > 0]]
X_test = X_test[X_train.columns]
model = MeanRisk(
    risk_measure=RiskMeasure.STANDARD_DEVIATION,
    objective_function=ObjectiveFunction.MAXIMIZE_RATIO,
    portfolio_params=dict(name="Max Sharpe"),
)
model.fit(X_train)
model.weights_

pred_model = model.predict(X_test)

population = Population([pred_model])

fig = population.plot_composition()
show(fig)
fig = population.plot_cumulative_returns()
show(fig)

population.summary()

Unnamed: 0,Max Sharpe
Mean,0.0023%
Annualized Mean,0.57%
Variance,0.000001%
Annualized Variance,0.00034%
Semi-Variance,0.000000%
Annualized Semi-Variance,0.000042%
Standard Deviation,0.012%
Annualized Standard Deviation,0.18%
Semi-Deviation,0.0041%
Annualized Semi-Deviation,0.064%
