# Statistical Arbitrage Strategy Implementation

In [None]:
import pandas as pd
from datetime import datetime, timedelta
import numpy as np
from src.trading_strategy import TradingStrategy 
from src.back_testing import BackTesting
from src.config import tokens_price_path, tokens_largest_cap_path
from src.utility import Utility
import warnings
warnings.filterwarnings("ignore")
print("Modules Imported")

Read in and clean dataset

In [None]:
# Read in prices and largest-cap tokens data
tokens_price = pd.read_csv(tokens_price_path)
tokens_largest_cap = pd.read_csv(tokens_largest_cap_path)

# Convert the column 'startTime' to datetime object
tokens_price['startTime'] = pd.to_datetime(tokens_price['startTime'])
tokens_largest_cap['startTime'] = pd.to_datetime(tokens_largest_cap['startTime'])

# Set the 'startTime' column as the index
tokens_price.set_index('startTime', inplace=True)
tokens_largest_cap.set_index('startTime', inplace=True)
tokens_largest_cap

## Testing Signal Generation

In [None]:
# Pick a start time any time around 2021
# Here we test if signals can be generated correctly by our trading_strategy.py
start_time = datetime.strptime('2021-10-20 03:00:00+00:00', '%Y-%m-%d %H:%M:%S%z')
M = 240
test = TradingStrategy(tokens_price, tokens_largest_cap, start_time, M)
signals = test.generate_signals()
signals

## Run a testing period from 2021-09-26 00:00:00 to 2022-09-25 23:00:00.

In [None]:
start_time = '2021-09-26 00:00:00+00:00'
end_time = '2022-09-25 05:00:00+00:00'
start_time = Utility.parse_date(start_time)
end_time = Utility.parse_date(end_time)
prices_BTC = tokens_price['BTC']
filtered_BTC = prices_BTC .loc[start_time:end_time]
filtered_BTC

In [None]:
start_time = '2021-09-26 00:00:00+00:00'
end_time = '2022-09-25 05:00:00+00:00'
backtest = BackTesting(start_time, end_time, M, tokens_largest_cap, tokens_price)

# Task1a

In [None]:
portfolios_df = backtest.eigen_vectors_portfolios_df
portfolios_df['time'] = pd.to_datetime(portfolios_df['time'])
portfolios_df.set_index('time', inplace=True)
eigenportfolio1_df = portfolios_df.pivot(columns='common_tokens', values='eigenportfolio1')
eigenportfolio2_df = portfolios_df.pivot(columns='common_tokens', values='eigenportfolio2')
eigenportfolio1_df.to_csv("eigenportfolio1.csv")
eigenportfolio2_df.to_csv("eigenportfolio2.csv")

In [None]:
eigenvector1_df = portfolios_df.pivot(columns='common_tokens', values='eigenvectors1')
eigenvector2_df = portfolios_df.pivot(columns='common_tokens', values='eigenvectors2')
eigenvector1_df.to_csv("eigenvector1.csv")
eigenvector2_df.to_csv("eigenvector2.csv")

In [None]:
all_returns = tokens_price.pct_change()
all_returns = all_returns.fillna(0)
all_returns

In [None]:

eigenportfolio1_df.fillna(0, inplace=True)

# Calculate the absolute row sums
row_sums = eigenportfolio1_df.sum(axis=1)

# Divide each element by its row sum
eigenportfolio1_scaled = eigenportfolio1_df.div(row_sums, axis=0)
token_name = eigenportfolio1_scaled.columns
eigenportfolio1_returns = all_returns[token_name]
eigenportfolio1_returns = eigenportfolio1_returns.loc[eigenportfolio1_scaled.index.tolist()]
returns1 = eigenportfolio1_scaled.multiply(eigenportfolio1_returns).sum(axis=1)
returns1.replace([np.inf, -np.inf], np.nan, inplace=True)
# Replace NaN values with 0
returns1.fillna(0, inplace=True)
cum_return1 = (1 + returns1).cumprod() - 1
cum_return1.name = 'eigenportfolio1'
cum_return1

In [None]:
eigenportfolio2_df.fillna(0, inplace=True)

# Calculate the absolute row sums
row_sums = eigenportfolio2_df.sum(axis=1)

# Divide each element by its row sum
eigenportfolio2_scaled = eigenportfolio2_df.div(row_sums, axis=0)
token_name = eigenportfolio2_scaled.columns
eigenportfolio2_returns = all_returns[token_name]
eigenportfolio2_returns = eigenportfolio2_returns.loc[eigenportfolio2_scaled.index.tolist()]
returns2 = eigenportfolio2_scaled.multiply(eigenportfolio2_returns).sum(axis=1)
returns2.replace([np.inf, -np.inf], np.nan, inplace=True)
# Replace NaN values with 0
returns2.fillna(0, inplace=True)
cum_return2 = (1 + returns2).cumprod() - 1
cum_return2.name = 'eigenportfolio2'
cum_return2.plot()

In [None]:
btc_return = all_returns.loc[eigenportfolio2_scaled.index.tolist(), 'BTC']
eth_return = all_returns.loc[eigenportfolio2_scaled.index.tolist(), 'ETH']
btc_cum_ret = (1 + btc_return).cumprod() - 1
eth_cum_ret = (1 + eth_return).cumprod() - 1

In [None]:
result = pd.concat([cum_return1, btc_cum_ret, eth_cum_ret], axis=1)
result.plot()

Eigenportfolio1's Graph

In [None]:
import matplotlib.pyplot as plt

# Convert 'time' to datetime and set as index for easier slicing
# portfolios_df['time'] = pd.to_datetime(portfolios_df['time'])
# portfolios_df.set_index('time', inplace=True)

# Extract the specific timestamps for plotting
time1 = '2021-09-26T12:00:00+00:00'
time2 = '2022-04-15T20:00:00+00:00'

# Slice the DataFrame for the two specified timestamps
portfolio_time1 = portfolios_df.loc[time1].set_index('common_tokens').sort_values(by='eigenportfolio1', ascending=False)
portfolio_time2 = portfolios_df.loc[time2].set_index('common_tokens').sort_values(by='eigenportfolio1', ascending=False)

# Define a function to create bar plots for eigenportfolio weights
def plot_eigenportfolio_weights(portfolio, title):
    plt.figure(figsize=(8, 4))
    plt.plot(portfolio.index, portfolio['eigenportfolio1'])
    plt.title(title)
    plt.xlabel('Tokens')
    plt.ylabel('Eigenportfolio Weights')
    plt.xticks(rotation=90)
    plt.grid(True)
    plt.tight_layout()

# Plot for the first timestamp
plot_eigenportfolio_weights(portfolio_time1, f'Eigenportfolio1 Weights at {time1}')

# Plot for the second timestamp
plot_eigenportfolio_weights(portfolio_time2, f'Eigenportfolio1 Weights at {time2}')

# Show plots
plt.show()


## Eigenportfolio2's Graph

In [None]:

portfolios_df = backtest.eigen_vectors_portfolios_df
# Convert 'time' to datetime and set as index for easier slicing
# portfolios_df['time'] = pd.to_datetime(portfolios_df['time'])
# portfolios_df.set_index('time', inplace=True)

# Extract the specific timestamps for plotting
time1 = '2021-09-26T12:00:00+00:00'
time2 = '2022-04-15T20:00:00+00:00'

# Slice the DataFrame for the two specified timestamps
portfolio_time1 = portfolios_df.loc[time1].set_index('common_tokens').sort_values(by='eigenportfolio2', ascending=False)
portfolio_time2 = portfolios_df.loc[time2].set_index('common_tokens').sort_values(by='eigenportfolio2', ascending=False)

# Define a function to create bar plots for eigenportfolio weights
def plot_eigenportfolio_weights(portfolio, title):
    plt.figure(figsize=(8, 4))
    plt.plot(portfolio.index, portfolio['eigenportfolio2'])
    plt.title(title)
    plt.xlabel('Tokens')
    plt.ylabel('Eigenportfolio Weights')
    plt.xticks(rotation=90)
    plt.grid(True)
    plt.tight_layout()

# Plot for the first timestamp
plot_eigenportfolio_weights(portfolio_time1, f'Eigenportfolio2 Weights at {time1}')

# Plot for the second timestamp
plot_eigenportfolio_weights(portfolio_time2, f'Eigenportfolio2 Weights at {time2}')

# Show plots
plt.show()

# BTC Visualization

In [None]:
df = backtest.signals_df
btc_df = df[(df['Token'] == 'BTC') & (df['time'] >= '2021-09-26 00:00:00') & (df['time'] <= '2021-10-25 23:00:00')]
eth_df = df[(df['Token'] == 'ETH') & (df['time'] >= '2021-09-26 00:00:00') & (df['time'] <= '2021-10-25 23:00:00')]

In [None]:
import matplotlib.pyplot as plt

# Plotting the evolution of s_score for BTC
plt.figure(figsize=(12, 6))
plt.plot(btc_df['time'], btc_df['s_score'], marker='o', linestyle='-', color='b')
plt.title('Evolution of s_score for BTC from 2021-09-26 to 2021-10-25')
plt.xlabel('Time')
plt.ylabel('s_score')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
# Plotting the evolution of s_score for BTC
plt.figure(figsize=(12, 6))
plt.plot(eth_df['time'], eth_df['s_score'], marker='o', linestyle='-', color='b')
plt.title('Evolution of s_score for ETH from 2021-09-26 to 2021-10-25')
plt.xlabel('Time')
plt.ylabel('s_score')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
pivot_df = df.pivot(index='time', columns='Token', values='trading_signal')
pivot_df.to_csv('trading_signals.csv')

In [None]:
returns_df = backtest.portfolio_value
returns_df = returns_df.dropna()
# Calculate the cumulative return
returns_df['CumulativeReturn'] = (1 + returns_df['Return']).cumprod() - 1

# Plotting the cumulative return curve
plt.figure(figsize=(12, 6))
plt.plot(returns_df['Time'], returns_df['CumulativeReturn'], color='blue')
plt.title('Cumulative Return Curve of the Strategy')
plt.xlabel('Time')
plt.ylabel('Cumulative Return')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()

In [None]:
# Plotting the histogram of hourly returns
plt.figure(figsize=(12, 6))
plt.hist(returns_df['Return'], bins=50, color='green')
plt.title('Histogram of Hourly Returns')
plt.xlabel('Hourly Return')
plt.ylabel('Frequency')
plt.grid(True)
plt.tight_layout()

# Show the plots
plt.show()

In [None]:
print("Here is the Sharpe ratio")
sharpe_ratio = backtest.calculate_sharpe_ratio()
print(sharpe_ratio)

In [None]:
print("Here is the maximum drawdown")
maximum_drawdown = backtest.calculate_maximum_drawdown()
print(maximum_drawdown)