In [107]:
import pandas as pd
import numpy as np
from backtesting import Backtest, Strategy
from data_storage import create_connection
from stockstats import StockDataFrame
from risk_metrics import Risk_Metrics

In [108]:
connection = create_connection("../database/crypto_billionairs.db")

In [109]:
def momentum(df, lag):
    return df.pct_change(periods=lag)

In [110]:
class Momentum_AR_raw_long_short(Strategy):
   
    
    def init(self):
        # compute the rsi and stochastic oscillator with stockstats and return the buy signal of the current row
        
        self.init_long_signal = self.I(init_buy_signal, self.data.df)
        self.init_close_long_signal = self.I(init_close_long_signal, self.data.df)
        self.init_short_signal = self.I(init_short_signal, self.data.df)
        self.init_close_short_signal = self.I(init_close_short_signal, self.data.df)

        self.data.df.drop(self.data.df.columns.difference(['Open', 'High', 'Low', 'Close', "Volume"]), 1, inplace=True)
        
       
    
    def next(self):
        
        if self.init_long_signal[-1] == 1 and self.position.is_long is False:
            self.position.close()
            self.buy()
            
        elif self.init_close_long_signal[-1] == 1 and self.position.is_long:
             self.position.close()
             
        elif self.init_short_signal[-1] == -1 and self.position.is_short:
            self.position.close()
            self.sell()
            
        elif self.init_close_short_signal[-1] == -1 and self.position.is_short:
            self.position.close()
            

In [111]:
def init_buy_signal(trash):
    return df_temp['buy_indicator']

def init_short_signal(trash):
    return df_temp['short_indicator']

def init_close_long_signal(df):
    return df_temp['close_buy_indicator']

def init_close_short_signal(df):
    return df_temp['close_short_indicator']

In [112]:
def run_backtesting_raw(db_connection):
    
    global df_temp
    
    table_names = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;", db_connection)
    
    table_names_list = table_names['name'].tolist()

    filtered_table_names = [name for name in table_names_list if "_1day_features" in name and 'trades' not in name and not 'equity_curve' in name and not "_pooling" in name]
    print(filtered_table_names)
    df_risk = pd.DataFrame(columns = range(11))
    df_risk.columns = ["table_name", "timehorizon", "return", "buy and hold return", "annualized_return", "sharpe_ratio_annualized", "sortino_ratio_annualized", "maximum_drawdown", "calmar_ratio_annualized", "trades_count", "win_rate"]
    
    for table in filtered_table_names:
        
        df_temp = pd.read_sql_query(f"select * from {table}", db_connection)
        df_temp = df_temp[-365:]
        df_backtesting = pd.DataFrame()
        df_backtesting[['Open', 'High', 'Low', 'Close', "Volume"]] = df_temp[['open', 'high', 'low', 'close', 'volume']]
        
        
        bt = Backtest(df_backtesting, Momentum_AR_raw_long_short, cash=100_000, commission=.001)
        stats = bt.run()
        trades = pd.DataFrame(stats['_trades'])
        trades.to_sql(f"trades_{table}", db_connection, if_exists="replace")
        
        equity_curve = pd.DataFrame(stats["_equity_curve"])
        equity_curve.to_sql(f"{table}_equity_curve", db_connection, if_exists="replace")
        
        risk = Risk_Metrics(trades, df_temp, 0, True, stats)
        risk_metrics_list = [f"{table}", len(df_backtesting), stats["Return [%]"], stats["Buy & Hold Return [%]"], risk._annualize(stats["Return [%]"]),
                             risk.sharpe_ratio("return"), risk.sortino_ratio("return"),
                             risk.max_drawdown(), risk.calmar_ratio(stats["Return [%]"]), len(trades), stats["Win Rate [%]"]]
        print(risk_metrics_list)
        #df_risk.append(risk_metrics_list)
        df_risk.loc[len(df_risk)] = risk_metrics_list
        
    
    df_risk.to_sql("cryptocurrencies_risk_metrics_1m_abnormal_day", db_connection, if_exists="replace")

In [113]:
run_backtesting_raw(connection)

['ADA_1min_complete_1day_preprocessed_1day_features', 'BCH_1min_complete_1day_preprocessed_1day_features', 'BTC_1min_complete_1day_preprocessed_1day_features', 'DOGE_1min_complete_1day_preprocessed_1day_features', 'ETH_1min_complete_1day_preprocessed_1day_features', 'LINK_1min_complete_1day_preprocessed_1day_features', 'LTC_1min_complete_1day_preprocessed_1day_features', 'TRX_1min_complete_1day_preprocessed_1day_features']
['ADA_1min_complete_1day_preprocessed_1day_features', 365, 89.0631874838907, -4.8451539339218845, 89.0631874838907, 15.368459796776953, 1.809056176414122, 0.1371261513330252, 6.494981928544865, 44, 52.27272727272727]


  bt = Backtest(df_backtesting, Momentum_AR_raw_long_short, cash=100_000, commission=.001)


['BCH_1min_complete_1day_preprocessed_1day_features', 365, 136.22344902462095, -34.76161310594152, 136.22344902462095, 24.570105443461806, 2.8825293913510732, 0.07573333888926118, 17.987249872055642, 34, 64.70588235294117]
['BTC_1min_complete_1day_preprocessed_1day_features', 365, 32.7791582862371, -23.15393273463302, 32.7791582862371, 11.703040237858092, 0.6800280962620895, 0.04698090067296857, 6.977124281718435, 40, 62.5]
['DOGE_1min_complete_1day_preprocessed_1day_features', 365, 724.0276396024174, 135.67779393382514, 724.0276396024174, 35.40416810868232, 14.727582954708643, 0.07122321602796335, 101.65612843404219, 38, 57.89473684210527]
['ETH_1min_complete_1day_preprocessed_1day_features', 365, 85.61771405732893, 63.42980342826808, 85.61771405732893, 24.87883612252383, 1.9065583744524892, 0.0, 0, 41, 70.73170731707317]
['LINK_1min_complete_1day_preprocessed_1day_features', 365, 122.71105999790602, -45.553785480872236, 122.71105999790602, 24.98820582407941, 2.580401586158408, 0.0414

In [114]:
class Momentum_AR_long_short(Strategy):
   
    
    def init(self):
        # compute the rsi and stochastic oscillator with stockstats and return the buy signal of the current row
        
        self.init_long_short_signal = self.I(init_long_short_signal, self.data.df)
        self.init_close_long_short_signal = self.I(init_close_long_short_signal, self.data.df)
       

        self.data.df.drop(self.data.df.columns.difference(['Open', 'High', 'Low', 'Close', "Volume"]), 1, inplace=True)
        
       
    
    def next(self):
        
        if int(float(self.init_long_short_signal[-1])) == 1 and self.position.is_long is False:
            self.position.close()
            self.buy()
            
        elif int(float(self.init_close_long_short_signal[-1])) == 1 and self.position.is_long:
             self.position.close()
            
        elif int(float(self.init_long_short_signal[-1])) == -1 and self.position.is_short is False:
            self.position.close()
            self.sell()
            
        elif int(float(self.init_close_long_short_signal[-1])) == -1 and self.position.is_short:
            self.position.close()

In [115]:
def init_long_short_signal(trash):
    return df_temp_ml['buy_short_indicator']

def init_close_long_short_signal(trash):
    return df_temp_ml['close_buy_short_indicator']

In [116]:
def run_backtesting_ml(db_connection):
    
    global df_temp_ml
    
    table_names = pd.read_sql_query("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;", db_connection)
    
    table_names_list = table_names['name'].tolist()

    filtered_table_names = [name for name in table_names_list if ("_ensemble" in name or "_svc" in name or "_knn" in name or "_random_forest" in name or "_logistic_regression" in name or "_mlp" in name) and 'trades' not in name and not 'equity_curve' in name]
    print(filtered_table_names)
    df_risk = pd.DataFrame(columns = range(11))
    df_risk.columns = ["table_name", "timehorizon", "return", "buy and hold return", "annualized_return", "sharpe_ratio_annualized", "sortino_ratio_annualized", "maximum_drawdown", "calmar_ratio_annualized", "trades_count", "win_rate"]
    
    for table in filtered_table_names:
        
        df_temp_ml = pd.read_sql_query(f"select * from {table}", db_connection)
        df_temp_ml["return"] = momentum(df_temp_ml["close"], 1) + 1
        
        #df_temp_ml = df_temp_ml[-356:]
        
        df_backtesting = pd.DataFrame()
        df_backtesting[['Open', 'High', 'Low', 'Close', "Volume"]] = df_temp_ml[['open', 'high', 'low', 'close', 'volume']]
        
        
        bt = Backtest(df_backtesting, Momentum_AR_long_short, cash=100_000, commission=.001)
        stats = bt.run()
        trades = pd.DataFrame(stats['_trades'])
        trades.to_sql(f"trades_{table}", db_connection, if_exists="replace")
        
        equity_curve = pd.DataFrame(stats["_equity_curve"])
        equity_curve.to_sql(f"{table}_equity_curve", db_connection, if_exists="replace")
        
        risk = Risk_Metrics(trades, df_temp_ml, 0, True, stats)
        
        try:
            risk_metrics_list = [f"{table}", len(df_backtesting), stats["Return [%]"], stats["Buy & Hold Return [%]"], risk._annualize(stats["Return [%]"]),
                             risk.sharpe_ratio("return"), risk.sortino_ratio("return"),
                             risk.max_drawdown(), risk.calmar_ratio(stats["Return [%]"]), len(trades), stats["Win Rate [%]"]]
            print(risk_metrics_list)
            df_risk.loc[len(df_risk)] = risk_metrics_list
        
        except:
            print(table, len(trades))
            pass
        
    
    df_risk.to_sql("cryptocurrencies_risk_metrics_1m_abnormal_day_ml", db_connection, if_exists="replace")

In [117]:
run_backtesting_ml(connection)

['ADA_1min_complete_1day_preprocessed_1day_features_ensemble_pooling', 'ADA_1min_complete_1day_preprocessed_1day_features_knn_pooling', 'ADA_1min_complete_1day_preprocessed_1day_features_logistic_regression_pooling', 'ADA_1min_complete_1day_preprocessed_1day_features_mlp_classifier_pooling', 'ADA_1min_complete_1day_preprocessed_1day_features_random_forest_pooling', 'ADA__knn', 'ADA__knn_resampled', 'ADA__logistic_regression', 'ADA__logistic_regression_resampled', 'ADA__mlp', 'ADA__mlp_resampled', 'ADA__random_forest', 'ADA__random_forest_resampled', 'ADA__svc', 'ADA__svc_resampled', 'BCH_1min_complete_1day_preprocessed_1day_features_ensemble_pooling', 'BCH_1min_complete_1day_preprocessed_1day_features_knn_pooling', 'BCH_1min_complete_1day_preprocessed_1day_features_logistic_regression_pooling', 'BCH_1min_complete_1day_preprocessed_1day_features_mlp_classifier_pooling', 'BCH_1min_complete_1day_preprocessed_1day_features_random_forest_pooling', 'BCH__knn', 'BCH__knn_resampled', 'BCH__log

  bt = Backtest(df_backtesting, Momentum_AR_long_short, cash=100_000, commission=.001)


['ADA_1min_complete_1day_preprocessed_1day_features_knn_pooling', 365, -38.686304675906094, -4.8451539339218845, -38.686304675906094, -8.250874837362248, -0.799567338673276, 0.6042518793731176, -0.6402347431015238, 183, 45.90163934426229]
['ADA_1min_complete_1day_preprocessed_1day_features_logistic_regression_pooling', 365, -35.41355847441906, -4.8451539339218845, -35.41355847441906, -7.435422641757007, -0.7317885012470011, 0.6171462740363557, -0.57382763154028, 144, 49.30555555555556]
['ADA_1min_complete_1day_preprocessed_1day_features_mlp_classifier_pooling', 365, -63.16119503578275, -4.8451539339218845, -63.16119503578275, -15.11537914331272, -1.3253632632444843, 0.7120537910787719, -0.8870284215479369, 184, 39.67391304347826]
['ADA_1min_complete_1day_preprocessed_1day_features_random_forest_pooling', 365, -44.02606477075013, -4.8451539339218845, -44.02606477075013, -9.738596638712732, -0.9147862795265549, 0.5523532367478011, -0.7970635789150265, 84, 42.857142857142854]
ADA__knn 0
[