In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
# import statsmodels as stat
import matplotlib.pyplot as plt
import seaborn as sns
from utils import Variable, read_values

from sklearn.metrics import classification_report

In [3]:
values = read_values()
jan_2023 = values[2023][1]

In [4]:
def compute_lags(df):
    df['open_l1'] = df['open'].shift(1)
    df['open_l2'] = df['open'].shift(2)
    df['open_l3'] = df['open'].shift(3)
    df['open_l4'] = df['open'].shift(4)

def compute_diff(df):
    df['open_diff'] =  df['open'] - df['open'].shift(1)

def compute_rw(df):
    df['rw_4'] = df['open_diff'].rolling(window=4).mean()

def preproc(df):
    compute_lags(df)
    compute_diff(df)
    compute_rw(df)
    df.set_index('open_date', inplace=True)
    return df

In [7]:
jan_2023.loc['2023-01-01 01:00:00':'2023-01-02 01:00:00']

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_assets_volume,number_of_trades,taker_buy_base_asset_vol,taker_buy_quote_asset_vol,ignore,open_date,diff,dow


In [None]:
plt.figure(figsize=(10,10))
df = jan_2023
df = preproc(df)
sns.lineplot(df['open_diff'])
sns.lineplot(df['rw_4'])

In [27]:
df.head()

Unnamed: 0_level_0,open_time,open,high,low,close,volume,close_time,quote_assets_volume,number_of_trades,taker_buy_base_asset_vol,taker_buy_quote_asset_vol,ignore,diff,dow,open_l1,open_l2,open_l3,open_l4,open_diff,rw_4
open_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1


In [11]:

buy = np.where(jan_2023['rw_4'] < 0, 1, 0)
should_buy = jan_2023['open'].shift(-1) > jan_2023['open']
print(classification_report(should_buy, buy))

              precision    recall  f1-score   support

       False       0.49      0.62      0.55       336
        True       0.60      0.48      0.53       407

    accuracy                           0.54       743
   macro avg       0.55      0.55      0.54       743
weighted avg       0.55      0.54      0.54       743



In [12]:
def trading_sim(df):
    initial_investment = 10000  # Starting amount in cash
    cash = initial_investment
    holdings = 0
    returns = []
    # Simulate the trading bot's performance
    for index, row in df.iterrows():
        if row['buy']:
            if holdings == 0:  # Buy only if not already holding the commodity
                holdings = cash / row['price']
                cash = 0
        else:
            if holdings > 0:  # Sell only if holding the commodity
                cash = holdings * row['price']
                holdings = 0
        returns.append(cash if cash > 0 else holdings * row['price'])

    # Add the performance (returns) column to the DataFrame
    df['performance'] = returns

    # Display the DataFrame with the new 'performance' column
    # print(df[['price', 'buy', 'performance']])

    # Final performance result
    final_value = cash + (holdings * df.iloc[-1]['price'] if holdings > 0 else 0)
    print(f"Final portfolio value: {final_value}")
    print(f"Total return: {final_value - initial_investment}")

def trading_sim_preproc(df):
    df_proc = pd.DataFrame()
    buy = np.where(df['rw_4'] < 0, 1, 0)
    df_proc['price'] = df['open']
    df_proc['buy'] = buy
    return df_proc

trading_sim(trading_sim_preproc(jan_2023))

Final portfolio value: 12119.594835720036
Total return: 2119.594835720036


In [13]:
df = pd.DataFrame()
df['price'] = jan_2023['open']
df['buy'] = should_buy
trading_sim(df)


Final portfolio value: 32728.4622474298
Total return: 22728.4622474298


In [14]:
for i in range(1,13):
    df = preproc(values[2023][i])
    trading_sim(trading_sim_preproc(df))

KeyError: "None of ['open_date'] are in the columns"

In [27]:
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

# Custom Transformer: Ensures only the first '1' remains, the rest turn to '0'
class FirstBuyOnly(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = X.copy()
        X['target'] = 0  # Reset all to 0
        
        X.at[X.index[0], 'target'] = 1
        return X

# Updated Scoring Function: Computes Sharpe Ratio
def sharpe_ratio_scorer(y_true, y_pred, X, risk_free_rate=0):
    buy_prices = X.loc[X['target'] == 1, 'open']  # Get all buy prices
    daily_returns = X['open'].pct_change().dropna()  # Compute daily returns

    if buy_prices.empty or daily_returns.empty:
        print("No trades")
        return 0  # No trades or not enough data

    excess_returns = daily_returns - risk_free_rate
    sharpe_ratio = (np.mean(excess_returns) / np.std(excess_returns)) * np.sqrt(252)  # Annualized

    return sharpe_ratio

# Create the Pipeline
pipeline = Pipeline([
    ('first_buy_only', FirstBuyOnly()),  # Transform target column
])

df = jan_2023
df['target'] = 0
# Transform the Data
transformed_df = pipeline.fit_transform(df)

# Compute the Sharpe Ratio
strategy_sharpe = sharpe_ratio_scorer(df['target'], transformed_df['target'], transformed_df, 0.04/np.sqrt(252))

# Display Results
print("Transformed Target Column:\n", transformed_df[['target']])
print("\nStrategy Sharpe Ratio: {:.2f}".format(strategy_sharpe))

Transformed Target Column:
                      target
open_date                  
2023-01-01 01:00:00       1
2023-01-01 02:00:00       0
2023-01-01 03:00:00       0
2023-01-01 04:00:00       0
2023-01-01 05:00:00       0
...                     ...
2023-01-31 19:00:00       0
2023-01-31 20:00:00       0
2023-01-31 21:00:00       0
2023-01-31 22:00:00       0
2023-01-31 23:00:00       0

[743 rows x 1 columns]

Strategy Sharpe Ratio: -6.62


In [35]:
spent = (transformed_df['target'] * transformed_df['open']).sum()
valuation = transformed_df['target'].sum() * transformed_df['open'].iloc[-1]
return = valuation / spent

np.float64(1.402443133798237)