In [15]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Load your dataset
df = pd.read_csv(r"BTC_2019_2023_1h.csv")

# Preprocess the dataset
def preprocess_data(df):
    df['datetime'] = pd.to_datetime(df['datetime'])
    
    # Custom RSI calculation
    def calculate_rsi(data, timeperiod=14):
        delta = data.diff()
        gain = delta.where(delta > 0, 0).rolling(window=timeperiod).mean()
        loss = -delta.where(delta < 0, 0).rolling(window=timeperiod).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))
    
    df['RSI'] = calculate_rsi(df['close'], timeperiod=14)
    
    # Custom EMA calculation
    def calculate_ema(data, timeperiod):
        return data.ewm(span=timeperiod, adjust=False).mean()
    
    df['EMA_20'] = calculate_ema(df['close'], timeperiod=20)
    df['EMA_50'] = calculate_ema(df['close'], timeperiod=50)
    
    # Custom ATR calculation
    def calculate_atr(high, low, close, timeperiod=14):
        high_low = high - low
        high_close = (high - close.shift()).abs()
        low_close = (low - close.shift()).abs()
        true_range = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
        return true_range.rolling(window=timeperiod).mean()
    
    df['ATR'] = calculate_atr(df['high'], df['low'], df['close'], timeperiod=14)
    
    # Custom MACD calculation
    def calculate_macd(data, fastperiod=12, slowperiod=26, signalperiod=9):
        ema_fast = calculate_ema(data, fastperiod)
        ema_slow = calculate_ema(data, slowperiod)
        macd = ema_fast - ema_slow
        signal = macd.ewm(span=signalperiod, adjust=False).mean()
        return macd, signal
    
    df['MACD'], df['MACD_signal'] = calculate_macd(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    
    # Custom Bollinger Bands calculation
    def calculate_bbands(data, timeperiod=20, num_std_dev=2):
        sma = data.rolling(window=timeperiod).mean()
        std_dev = data.rolling(window=timeperiod).std()
        upper_band = sma + (std_dev * num_std_dev)
        lower_band = sma - (std_dev * num_std_dev)
        return upper_band, sma, lower_band
    
    df['BB_upper'], df['BB_middle'], df['BB_lower'] = calculate_bbands(df['close'], timeperiod=20)
    
    # Custom OBV calculation
    def calculate_obv(close, volume):
        obv = volume.copy()
        obv[1:] = np.where(close[1:] > close[:-1].values, volume[1:], 
                           np.where(close[1:] < close[:-1].values, -volume[1:], 0))
        return obv.cumsum()
    
    df['OBV'] = calculate_obv(df['close'], df['volume'])
    
    # Custom Stochastic Oscillator calculation
    def calculate_stochastic(high, low, close, timeperiod=14):
        lowest_low = low.rolling(window=timeperiod).min()
        highest_high = high.rolling(window=timeperiod).max()
        stochastic_k = 100 * ((close - lowest_low) / (highest_high - lowest_low))
        stochastic_d = stochastic_k.rolling(window=3).mean()  # %D is typically a 3-period SMA of %K
        return stochastic_k, stochastic_d
    
    df['Stochastic_k'], df['Stochastic_d'] = calculate_stochastic(df['high'], df['low'], df['close'])
    
    # Drop rows with NaN values (caused by indicators calculation)
    df = df.dropna()
    
    # Normalize the data
    scaler = MinMaxScaler()
    df[['close', 'volume', 'RSI', 'EMA_20', 'EMA_50', 'ATR', 'MACD', 'MACD_signal', 'OBV', 'Stochastic_k', 'Stochastic_d']] = scaler.fit_transform(
        df[['close', 'volume', 'RSI', 'EMA_20', 'EMA_50', 'ATR', 'MACD', 'MACD_signal', 'OBV', 'Stochastic_k', 'Stochastic_d']])
    
    return df

def label_market_regimes(df):
    # Calculate returns and volatility
    df['returns'] = df['close'].pct_change()
    df['volatility'] = df['returns'].rolling(window=20).std() * np.sqrt(20)  # Annualized volatility

    # Define moving averages
    df['SMA_short'] = df['close'].rolling(window=50).mean()
    df['SMA_long'] = df['close'].rolling(window=70).mean()

    # Initialize labels
    regimes = []

    for i in range(len(df)):
        if i < 70:  # Skip initial rows due to rolling calculations
            regimes.append(None)
            continue

        # Classify market regime
        if df['SMA_short'].iloc[i] < df['SMA_long'].iloc[i] and df['volatility'].iloc[i] > df['volatility'].quantile(0.75):
            regimes.append(0.0)  # Strong Bear Market
        elif df['SMA_short'].iloc[i] < df['SMA_long'].iloc[i] and df['volatility'].iloc[i] > df['volatility'].quantile(0.5):
            regimes.append(0.25)  # Bear Market
        elif abs(df['SMA_short'].iloc[i] - df['SMA_long'].iloc[i]) < df['volatility'].quantile(0.25):
            regimes.append(0.5)  # Sideways Market
        elif df['SMA_short'].iloc[i] > df['SMA_long'].iloc[i] and df['volatility'].iloc[i] < df['volatility'].quantile(0.5):
            regimes.append(0.75)  # Weak Bull Market
        else:
            regimes.append(1.0)  # Strong Bull Market

    df['Market Regime Label'] = regimes
    df.dropna(inplace=True)  # Drop rows with NaN values

    return df

# Function to create sequences of a given window size
def create_sequences(data, window_size):
    sequences = []
    for i in range(len(data) - window_size):
        sequences.append(data[i:i + window_size])
    return np.array(sequences)

# LSTM Model for market regime classification
def create_lstm_model(input_shape):
    model = tf.keras.models.Sequential([
        tf.keras.layers.LSTM(128, return_sequences=False, input_shape=input_shape),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation='sigmoid')  # Predict market regime between 0 and 1
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

# Assign market regime based on prediction
def assign_market_regime(prediction):
    if prediction <= 0.2:
        return "Strong Bear Market"
    elif 0.2 < prediction <= 0.4:
        return "Bear Market"
    elif 0.4 < prediction <= 0.6:
        return "Sideways Market"
    elif 0.6 < prediction <= 0.8:
        return "Weak Bull Market"
    else:
        return "Strong Bull Market"

# Generate trading signals based on market regime
def generate_trade_signals(df, regime):
    signals = []
    for i in range(1, len(df)):
        if regime == "Strong Bear Market":
            if df['RSI'].iloc[i] < 25 and df['MACD'].iloc[i] < df['MACD_signal'].iloc[i]:
                signals.append("Buy")
            else:
                signals.append("Hold")
        elif regime == "Bear Market":
            if df['RSI'].iloc[i] < 30 and df['MACD'].iloc[i] > df['MACD_signal'].iloc[i]:
                signals.append("Buy")
            else:
                signals.append("Hold")
        elif regime == "Sideways Market":
            if df['close'].iloc[i] < df['BB_lower'].iloc[i]:
                signals.append("Buy")
            elif df['close'].iloc[i] > df['BB_upper'].iloc[i]:
                signals.append("Sell")
            else:
                signals.append("Hold")
        elif regime == "Weak Bull Market":
            if df['RSI'].iloc[i] > 50 and df['MACD'].iloc[i] > df['MACD_signal'].iloc[i]:
                signals.append("Buy")
            else:
                signals.append("Hold")
        elif regime == "Strong Bull Market":
            if df['RSI'].iloc[i] > 60:
                signals.append("Buy")
            else:
                signals.append("Hold")
    return signals

# Simulate trading based on generated signals
def simulate_trading(signals, df):
    position = None
    trades = []
    for i, signal in enumerate(signals):
        if signal == "Buy" and position is None:
            position = df['close'].iloc[i]
            trades.append(('Buy', df['close'].iloc[i]))
        elif signal == "Sell" and position is not None:
            trades.append(('Sell', df['close'].iloc[i]))
            position = None
    return trades

In [16]:
# Step 1: Preprocess data and generate labels
df = preprocess_data(df)
df = label_market_regimes(df)
window_size = 60  # Look-back window size

# Step 2: Create sequences and labels for LSTM model
X = create_sequences(df[['close', 'volume', 'RSI', 'EMA_20', 'EMA_50', 'ATR', 'MACD', 'MACD_signal', 'OBV', 'Stochastic_k', 'Stochastic_d']].values, window_size)
y = create_sequences(df['Market Regime Label'].values, window_size)  # Use labels for training

# Step 3: Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 4: Train model
model = create_lstm_model((window_size, X.shape[2]))
model.fit(X_train, y_train, epochs=20, batch_size=64, validation_data=(X_test, y_test))

# Step 5: Predict market regimes
market_regime_predictions = model.predict(X_test)
market_regime_labels = [assign_market_regime(pred) for pred in market_regime_predictions]

# Step 6: Assign predictions to DataFrame
df['Market Regime Prediction'] = [None] * (len(df) - len(market_regime_labels)) + market_regime_labels


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[['close', 'volume', 'RSI', 'EMA_20', 'EMA_50', 'ATR', 'MACD', 'MACD_signal', 'OBV', 'Stochastic_k', 'Stochastic_d']] = scaler.fit_transform(


Epoch 1/20


  super().__init__(**kwargs)


[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 19ms/step - loss: 0.0313 - val_loss: 0.0061
Epoch 2/20
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - loss: 0.0235 - val_loss: 0.0061
Epoch 3/20
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - loss: 0.0222 - val_loss: 0.0057
Epoch 4/20
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - loss: 0.0219 - val_loss: 0.0060
Epoch 5/20
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - loss: 0.0218 - val_loss: 0.0059
Epoch 6/20
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 19ms/step - loss: 0.0209 - val_loss: 0.0060
Epoch 7/20
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 18ms/step - loss: 0.0210 - val_loss: 0.0057
Epoch 8/20
[1m471/471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 18ms/step - loss: 0.0207 - val_loss: 0.0061
Epoch 9/20
[1m471/471[0m [32m━━━━━━━━━━

In [17]:
valid_predictions = df[df['Market Regime Prediction'].notna()]
trade_signals = generate_trade_signals(valid_predictions, valid_predictions['Market Regime Prediction'].iloc[-1])

# Step 2: Assign trade signals back to the main DataFrame
# Pad the beginning with 'Hold' to match the original DataFrame length
df['Trade Signal'] = ['Hold'] * (len(df) - len(trade_signals)) + trade_signals

In [18]:
df

Unnamed: 0.1,Unnamed: 0,datetime,open,high,low,close,volume,RSI,EMA_20,EMA_50,...,OBV,Stochastic_k,Stochastic_d,returns,volatility,SMA_short,SMA_long,Market Regime Label,Market Regime Prediction,Trade Signal
89,89,2019-09-12 10:00:00,10123.64,10177.88,10122.75,0.094213,0.001782,0.576880,0.081843,0.080672,...,0.659742,0.684707,0.605840,0.005333,0.022520,0.093554,0.094358,0.5,,Hold
90,90,2019-09-12 11:00:00,10152.45,10187.90,10137.45,0.094476,0.001726,0.514373,0.081940,0.080704,...,0.659817,0.788600,0.656486,0.002786,0.021148,0.093530,0.094324,0.5,,Hold
91,91,2019-09-12 12:00:00,10170.00,10239.21,10170.00,0.095532,0.001690,0.637061,0.082132,0.080779,...,0.659889,1.000000,0.836267,0.011179,0.022074,0.093529,0.094302,0.5,,Hold
92,92,2019-09-12 13:00:00,10235.34,10360.13,10219.15,0.097150,0.001760,0.715047,0.082466,0.080917,...,0.659965,0.948775,0.928792,0.016941,0.026874,0.093557,0.094309,0.5,,Hold
93,93,2019-09-12 14:00:00,10337.06,10357.05,10272.77,0.096551,0.001778,0.661255,0.082708,0.081025,...,0.659889,0.826861,0.942197,-0.006173,0.028203,0.093572,0.094320,0.5,,Hold
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37803,37803,2023-12-31 20:00:00,42659.90,42724.50,42543.30,0.596056,0.013286,0.609386,0.600164,0.604050,...,0.580759,0.471622,0.574641,-0.001575,0.014158,0.590941,0.592184,0.5,Sideways Market,Buy
37804,37804,2023-12-31 21:00:00,42599.20,42717.00,42558.20,0.595435,0.010649,0.494095,0.600232,0.604130,...,0.580307,0.312105,0.454064,-0.001043,0.013218,0.591066,0.592175,0.5,Sideways Market,Buy
37805,37805,2023-12-31 22:00:00,42559.00,42629.50,42111.90,0.591350,0.033613,0.416721,0.599891,0.604040,...,0.578885,0.217972,0.320643,-0.006860,0.014877,0.591135,0.592141,0.5,Sideways Market,Buy
37806,37806,2023-12-31 23:00:00,42294.80,42380.10,42083.10,0.591647,0.032837,0.287470,0.599612,0.603966,...,0.580274,0.313340,0.265184,0.000502,0.014713,0.591240,0.592127,0.5,Sideways Market,Buy


In [20]:
 # Run the trading simulation
trades = simulate_trading(df['Trade Signal'].values, df)

# Calculate returns for backtesting
def calculate_backtest_returns(trades, initial_balance=100000):
    balance = initial_balance
    position = 0  # Number of units held
    for action, price in trades:
        if action == "Buy" and position == 0:
            position = balance / price  # Buy all units with available balance
            balance = 0
        elif action == "Sell" and position > 0:
            balance = position * price  # Sell all units
            position = 0

    # Calculate final balance (assuming all positions are sold at the last close price)
    final_balance = balance + position * df['close'].iloc[-1] if position > 0 else balance
    return final_balance, (final_balance - initial_balance) / initial_balance * 100  # Final balance and percentage return

# Execute backtesting and print results
final_balance, percentage_return = calculate_backtest_returns(trades)
print(f"Final Balance: {final_balance}")
print(f"Total Return: {percentage_return:.2f}%")


Final Balance: 186272.95726886697
Total Return: 86.27%
