In [1]:
import pandas as pd
import numpy as np
import datetime as dt
from datetime import datetime, timedelta, timezone
import matplotlib as plot
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import itertools as it
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
#from keras.utils import to_categorical
#from keras.models import Sequential
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
from sklearn.preprocessing import LabelEncoder
import json

enc = LabelEncoder()

In [3]:
BTC_df = pd.read_csv('20250202-20170908_BTC-USDT_1D_okx_ohlc_M.csv')
BNB_df = pd.read_csv('20250127-20170907_BNB-USDT_1H_ohlc_M.csv')
DOGE_df = pd.read_csv('20250202-20170908_DOGE-USDT_1D_okx_ohlc_M.csv')
XRP_df = pd.read_csv('20250202-20170908_XRP-USDT_1D_okx_ohlc_M.csv')


In [51]:
#RSI CALC
def get_up_or_down(df, period):
    for i in range(len(df)):
        if i > 0:
            if df.iloc[i]['close'] >= df.iloc[i-1]['close']:
                df.at[i, 'gain_'+str(period)] = df.iloc[i]['close'] - df.iloc[i-1]['close']
                df.at[i, 'loss_'+str(period)] = 0
            elif df.iloc[i]['close'] < df.iloc[i-1]['close']:
                df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
                df.at[i, 'gain_'+str(period)] = 0
            else:
                df.at[i, 'gain_'+str(period)] = 0
                df.at[i, 'loss_'+str(period)] = 0
    return df

def get_up_or_down_bin(df, offset):
    for i in range(len(df)):
        if i > 0:
            if df.iloc[i]['close'] >= df.iloc[i-offset]['close']:
                df.at[i, 'updown_'+str(offset)] = 1
            elif df.iloc[i]['close'] < df.iloc[i-offset]['close']:
                df.at[i, 'updown_'+str(offset)] = -1                
            else:
                df.at[i, 'updown_'+str(offset)] = 0
    return df
  
def get_relative_strength_index(df, period):
    df['Date'] = pd.to_datetime(df['timestamp'])
    df.set_index(df['Date'])
    df = get_up_or_down(df, period)
    return df

def get_average_gains(df, period):
    for i in range(len(df)):
        n, up, down = 0, 0, 0
        if i == period:
            while n < period:
                if df.iloc[i-n]['gain_'+str(period)] > 0:
                    up += df.iloc[i-n]['gain_'+str(period)]
                elif df.iloc[i-n]['loss_'+str(period)] > 0:
                    down += df.iloc[i-n]['loss_'+str(period)]
                else:
                    up += 0
                    down += 0
                n += 1
            df.at[i, 'ag_'+str(period)] = up/period
            df.at[i, 'al_'+str(period)] = down/period
        elif i > period:
            df.at[i, 'ag_'+str(period)] = (df.iloc[i-1]['ag_'+str(period)] * (period - 1) + df.iloc[i]['gain_'+str(period)])/period
            df.at[i, 'al_'+str(period)] = (df.iloc[i-1]['al_'+str(period)] * (period - 1) + df.iloc[i]['loss_'+str(period)])/period
            df['ag_'+str(period)] = df['ag_'+str(period)].fillna(0)
            df['al_'+str(period)] = df['al_'+str(period)].fillna(0)
    return df

def get_relative_strength(df, period):
    df = get_relative_strength_index(df,period)
    df = get_average_gains(df, period)
    for i in range(len(df)):
        if i >= period:
            df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
            df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
    return df

##MONEY FLOW
def get_typical_price(high, low, close):
    typical_price = (high+low+close/3)
    return typical_price

def get_raw_money_flow(typical_price, volume):
    money_flow = typical_price * volume
    return money_flow

def get_money_flow_ratio(money_flow, window=14):
    signal = np.where(money_flow > money_flow.shift(1), 1, np.where(money_flow < money_flow.shift(1), -1, 0))
    money_flow_s = money_flow * signal
    
    money_flow_positive = money_flow_s.rolling(window).apply(lambda x: np.sum(np.where(x >= 0.0, x, 0.0)), raw=True)
    money_flow_negative = abs(money_flow_s.rolling(window).apply(lambda x: np.sum(np.where(x < 0.0, x, 0.0)), raw=True))
    
    money_flow_ratio = money_flow_positive / money_flow_negative
    
    return money_flow_ratio

def get_money_flow_index(money_flow_ratio):
    money_flow_index = 100. - 100./(1. + money_flow_ratio)
    return money_flow_index

def money_flow_index(high, low, close, volume, window=14):
    mfr = get_money_flow_ratio((high+low+close/3) * volume, window)
    mfi = 100. - 100./(1. + mfr)
    return mfi

#Choppiness index
def get_ci(high, low, close, lookback):
    tr1 = pd.DataFrame(high - low).rename(columns = {0:'tr1'})
    tr2 = pd.DataFrame(abs(high - close.shift(1))).rename(columns = {0:'tr2'})
    tr3 = pd.DataFrame(abs(low - close.shift(1))).rename(columns = {0:'tr3'})
    frames = [tr1, tr2, tr3]
    tr = pd.concat(frames, axis = 1, join = 'inner').dropna().max(axis = 1)
    atr = tr.rolling(1).mean()
    highh = high.rolling(lookback).max()
    lowl = low.rolling(lookback).min()
    ci = 100 * np.log10((atr.rolling(lookback).sum()) / (highh - lowl)) / np.log10(lookback)
    return ci

#Feature Extraction
def feature_extraction(df, time_steps_arr):
    df['range'] = df['high'] - df['low']
    df['range%'] = df['range']/df['close']
    df['obv'] = (np.sign(df['close'].diff()) * df['volume_ccy']).fillna(0).cumsum()    
    df['return'] = df['close'].pct_change() 
    
    features_list = ['range', 'range%', 'obv', 'return']
    for time in time_steps_arr:
        #df['return_'+str(time)] = (df.close / df.close.shift(time)) - 1
        df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
        df = get_relative_strength(df, time)
        df = get_up_or_down_bin(df, time)
        df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
        df['ma_'+str(time)] = df['close'].rolling(time).mean()
        df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
        df['ma_'+str(time)] = df['close'].rolling(time).mean()
        df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
        df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
        df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
        df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
        
        features_list.extend([
                    'return_' + str(time), 'rs_' + str(time), 'updown_' + str(time),
                    'std_' + str(time), 'ma_' + str(time), 'mfi_' + str(time),
                    'avgvolm_' + str(time), 'avgvolty_' + str(time), 'rtrend_' + str(time),
                    'ci_' + str(time)
                ])
    return df, features_list

def training_states_shift(df, time_steps_arr):
    for time in time_steps_arr:
        df['mStateSt_'+str(time)] = df.markov_st.shift(time)
        df['mStateMid_'+str(time)] = df.markov_mt.shift(time)
        df['mStateLt_'+str(time)] = df.markov_lt.shift(time)
        
        training_list = []
        training_list.append('mStateSt_'+str(time))
        training_list.append('mStateMid_'+str(time))
        training_list.append('mStateLt_'+str(time))
    return df, training_list

In [52]:
BTC_df.columns = BTC_df.columns.str.strip().str.lower()

BTC_df['markov_mt'] = BTC_df['m_mt'].astype('str') 
BTC_df['markov_mt'] = enc.fit_transform(BTC_df['m_mt'])

BTC_df['markov_st'] = BTC_df['m_st'].astype('str')
BTC_df['markov_st'] = enc.fit_transform(BTC_df['m_st'])

BTC_df['markov_lt'] = BTC_df['m_lt'].astype('str')
BTC_df['markov_lt'] = enc.fit_transform(BTC_df['m_lt'])


XRP_df['markov_mt'] = XRP_df['m_mt'].astype('str') 
XRP_df['markov_mt'] = enc.fit_transform(XRP_df['m_mt'])

XRP_df['markov_st'] = XRP_df['m_st'].astype('str')
XRP_df['markov_st'] = enc.fit_transform(XRP_df['m_st'])

XRP_df['markov_lt'] = XRP_df['m_lt'].astype('str')
XRP_df['markov_lt'] = enc.fit_transform(XRP_df['m_lt'])


BNB_df['markov_mt'] = BNB_df['m_mt'].astype('str') 
BNB_df['markov_mt'] = enc.fit_transform(BNB_df['m_mt'])

BNB_df['markov_st'] = BNB_df['m_st'].astype('str')
BNB_df['markov_st'] = enc.fit_transform(BNB_df['m_st'])

BNB_df['markov_lt'] = BNB_df['m_lt'].astype('str')
BNB_df['markov_lt'] = enc.fit_transform(BNB_df['m_lt'])


DOGE_df['markov_mt'] = DOGE_df['m_mt'].astype('str') 
DOGE_df['markov_mt'] = enc.fit_transform(DOGE_df['m_mt'])

DOGE_df['markov_st'] = DOGE_df['m_st'].astype('str')
DOGE_df['markov_st'] = enc.fit_transform(DOGE_df['m_st'])

DOGE_df['markov_lt'] = DOGE_df['m_lt'].astype('str')
DOGE_df['markov_lt'] = enc.fit_transform(DOGE_df['m_lt'])

time_periods = (2,3,4,5,6,7,8,9,10,12,15,17,20,25,30,35,40,45,50,60,70,80,90,100,150,200)

BTC_df, features_list = feature_extraction(BTC_df, time_periods)
BTC_df, training_list = training_states_shift(BTC_df, time_periods)

XRP_df = feature_extraction(XRP_df, time_periods)[0]
XRP_df = training_states_shift(XRP_df, time_periods)[0]

BNB_df = feature_extraction(BNB_df, time_periods)[0]
BNB_df = training_states_shift(BNB_df, time_periods)[0]

DOGE_df = feature_extraction(DOGE_df, time_periods)[0]
DOGE_df = training_states_shift(DOGE_df, time_periods)[0]


stack_df = BTC_df.append(XRP_df)
stack_df = stack_df.append(BNB_df)
stack_df = stack_df.append(DOGE_df)

2


  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]


3


  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]


4
5
6
7
8
9
10
12
15
17
20
25
30
35
40
45
50
60
70
80
90
100
150
200
2
3
4
5
6
7
8


  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0
  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['r

9


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


10


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


12


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


15


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


17


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


20


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1


25


  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0
  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at

30


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


35


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


40


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


45


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1


50


  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0
  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1


60


  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0
  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = -1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at

70


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = 1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


80


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = 1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


90


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = 1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0


100


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = 1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1


150


  df.at[i, 'loss_'+str(period)] = df.iloc[i-1]['close'] - df.iloc[i]['close']
  df.at[i, 'gain_'+str(period)] = 0
  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = 1
  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['return_'+str(time)] = (df['close'] / df['close'].shift(time)) - 1
  df.at[

200


  df.at[i, 'ag_'+str(period)] = up/period
  df.at[i, 'al_'+str(period)] = down/period
  df.at[i, 'rs_'+str(period)] = df.iloc[i]['ag_'+str(period)]/df.iloc[i]['al_'+str(period)]
  df.at[i, 'rsi_'+str(period)] = (100-(100/(1+df.iloc[i]['rs_'+str(period)])))
  df.at[i, 'updown_'+str(offset)] = 1


2


  df['std_'+str(time)] = df['return_'+str(time)].rolling(time).std()
  df['ma_'+str(time)] = df['close'].rolling(time).mean()
  df['mfi_'+str(time)]= money_flow_index(df['high'], df['low'], df['close'], df['volume_ccy'], time)
  df['avgvolm_'+str(time)] = df['volume_ccy'].rolling(time).mean()
  df['avgvolty_'+str(time)] = df['std_'+str(time)].rolling(time).mean()
  df['rtrend_'+str(time)] = df['updown_'+str(time)].rolling(time).sum()
  df['ci_'+str(time)] = get_ci(df['high'], df['low'], df['close'], time)
  df['mStateSt_'+str(time)] = df.markov_st.shift(time)
  df['mStateMid_'+str(time)] = df.markov_mt.shift(time)
  df['mStateLt_'+str(time)] = df.markov_lt.shift(time)
  df['mStateSt_'+str(time)] = df.markov_st.shift(time)
  df['mStateMid_'+str(time)] = df.markov_mt.shift(time)
  df['mStateLt_'+str(time)] = df.markov_lt.shift(time)
  df['mStateSt_'+str(time)] = df.markov_st.shift(time)
  df['mStateMid_'+str(time)] = df.markov_mt.shift(time)
  df['mStateLt_'+str(time)] = df.markov_lt.shi

ParserError: hour must be in 0..23: 59:23.6

In [37]:
"""
next steps
-merge the columns/format for all training data
-label more data
-add in funding rates, implied vols, spx, gold, qqq, silver, copper, tbill rate, fed decision day, money supply (M2)
-stack the training data
-set this up as training, validation, prediction
-try LR, RF, and NN
-research how to identify rangebound markets' peaks and troughs
-figure out how to re-apply predicted markov states to be fed into training
-run this three times: try both 
    1) predict short term, then 2) mid term then 3) long term
    1) predict long term, then 2) mid term then 3) short term
    and see which predicts better
-apply Kelly Criterion
-research stop losses

"""
print(list(BTC_df.columns))


['timestamp', 'open', 'high', 'low', 'close', 'volume', 'volume_ccy', 'volCcyQuote', 'm_mt', 'm_st', 'm_lt', 'markov_mt', 'markov_st', 'markov_lt', 'range', 'range%', 'obv', 'return', 'return_2', 'Date', 'gain', 'loss', 'ag', 'al', 'rs', 'rsi', 'gain_2', 'loss_2', 'ag_2', 'al_2', 'rs_2', 'rsi_2', 'updown_2', 'std_2', 'ma_2', 'mfi_2', 'avgvolm_2', 'avgvolty_2', 'rtrend_2']


In [68]:
dataCleaned = stack_df.dropna()
y = dataCleaned['markov_mt'] # Target variable

all_cols = BTC_df.columns.tolist()
drop_list = [item for item in all_cols if item is not in features_list]

X = stack_df.drop(drop_list, axis=1)


In [84]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = LogisticRegression(random_state=42, max_iter=99999) # Initialize the model
model.fit(X_train, y_train) # Train the model

LogisticRegression(max_iter=99999, random_state=42)

In [None]:
#classification algorithms 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))