# Neurog Internship: (Task 12)

# All Libraries

In [1]:
# For Vizualization & Plotting
import matplotlib.pyplot as plt

from ta.volume import OnBalanceVolumeIndicator

# Other Necessary Libraries
import pandas as pd
import numpy as np
import copy
import os

# All Declarations

In [2]:
# Define the relative path to the CSV file
csv_filename = "BTCUSDT_1_Minute_From_2020_Till_Now.csv"

# Settig Up The Directory From The Directory Hierarchy
# Get the notebook's current directory
notebook_dir = os.getcwd()

# Go up one level to the parent directory
parent_dir = os.path.abspath(os.path.join(notebook_dir, os.pardir))

# Create the full path to the output CSV file in the 'ohlc' directory within 'data'
data_dir = os.path.join(parent_dir, "data")
ohlc_dir = os.path.join(data_dir, "ohlc")
full_csv_path = os.path.join(ohlc_dir, csv_filename)

# All Functions

## Utility Functions

In [3]:
# Function to return the timeframe of the dataframe's index
def get_timeframe(df):
    """
    Returns the inferred timeframe (frequency) of the datetime index of a DataFrame.
    
    Parameters:
        df (pd.DataFrame): DataFrame with datetime index.
    
    Returns:
        str: Timeframe (frequency) of the datetime index, e.g., '4H', '1D'.
    """
    # Check if the index is datetime
    if not pd.api.types.is_datetime64_any_dtype(df.index):
        raise ValueError("DataFrame index must be datetime.")
    
    # Infer the frequency of the datetime index
    frequency = pd.infer_freq(df.index)

    if len(frequency) == 1:
        frequency = '1' + frequency
    
    if frequency is None:
        raise ValueError("Unable to infer the frequency of the datetime index.")
    
    return frequency

# Function that extracts previous 'N' times data from the latest data
def get_past_data(df, period, timeframe):
    """
    Returns data from the past specified period and aligns the start and end dates with the given timeframe.
    
    Parameters:
        df (pd.DataFrame): DataFrame with datetime index.
        period (str): Period string, e.g., '1Y' for one year, '1M' for one month.
        timeframe (str): Timeframe string, e.g., '1M' for one month, '4H' for four hours, '15T' for fifteen minutes.
    
    Returns:
        pd.DataFrame: DataFrame filtered by the specified period and aligned with the timeframe.
    """
    # Check if the index is datetime
    if not pd.api.types.is_datetime64_any_dtype(df.index):
        raise ValueError("DataFrame index must be datetime.")
    
    # Mapping period strings to DateOffset arguments
    period_mapping = {
        'Y': 'years',
        'M': 'months',
        'W': 'weeks',
        'D': 'days',
        'H': 'hours',
        'T': 'minutes'
    }
    
    # Extract the time unit and quantity for the period
    period_unit = period[-1]
    period_quantity = int(period[:-1])
    
    # Get the corresponding DateOffset argument for the period
    if period_unit not in period_mapping:
        raise ValueError("Invalid period format. Use formats like '1Y', '1M', '1W', '1D', etc.")
    
    period_offset_arg = period_mapping[period_unit]
    period_offset = pd.DateOffset(**{period_offset_arg: period_quantity})
    
    # Get the last date in the DataFrame
    last_date = df.index[-1]
    
    # Calculate the start date
    start_date = last_date - period_offset
    
    # Align start_date to the nearest preceding datetime divisible by the timeframe
    freq = pd.tseries.frequencies.to_offset(timeframe)
    aligned_start_date = start_date.floor(freq)
    
    # Align end_date to the nearest preceding datetime divisible by the timeframe
    aligned_end_date = last_date.floor(freq)
    
    # Filter the DataFrame for the desired date range
    filtered_df_last = df[(df.index >= aligned_start_date) & (df.index <= aligned_end_date)]
    filtered_df_first = df[(df.index < aligned_start_date)]
    
    return filtered_df_first, filtered_df_last

## (Function To Convert DF To Any Timeframe) - Taken From The Previous Day

In [4]:
# Functions converts the dataframe into any given time frame.
def convert_1m_to_any_timeframe(df: pd.DataFrame, timeframe: str) -> pd.DataFrame:
    """
    Convert a DataFrame of 1-minute OHLC data to any given timeframe.

    Args:
    - df (pd.DataFrame): DataFrame containing 1-minute OHLC data. 
      The DataFrame should have a datetime index & columns ['Open', 'High', 'Low', 'Close', 'Volume'].
    - timeframe (str): The desired timeframe to resample the data to (e.g., '1H' for 1 hour, '1D' for 1 day).

    Returns:
    - pd.DataFrame: Resampled DataFrame with OHLC data in the specified timeframe. The index will be renamed to
      reflect the new timeframe.

    Example:
    ```
    resampled_df = convert_1m_to_any_timeframe(ohlc_df, '1H')
    ```
    """
    # Ensure the DataFrame index is of datetime type
    df.index = pd.to_datetime(df.index)
    
    # Try resampling the data to the desired timeframe
    try:
        df_resampled = df.resample(timeframe).agg({
            'Open': 'first',  # Take the first 'Open' value in the timeframe
            'High': 'max',    # Take the maximum 'High' value in the timeframe
            'Low': 'min',     # Take the minimum 'Low' value in the timeframe
            'Close': 'last',  # Take the last 'Close' value in the timeframe
            'Volume': 'mean'  # Take the mean 'Volume' value in the timeframe
        })
    except Exception as e:
        print(f"An error occurred while resampling! Error message: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error

    # Rename the index to reflect the new timeframe
    df_resampled.index.rename(f'Open time ({timeframe})', inplace = True)
    
    return df_resampled

## OBV Indicator Function

In [5]:
def calculate_obv(df: pd.DataFrame, fillna: bool = False) -> pd.DataFrame:
    """
    Generate On-Balance Volume (OBV) Values.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' and 'Volume' columns.
    - fillna (bool): Parameter that specifies whether or not to fill NaN values.

    Returns:
    - df (pd.DataFrame): DataFrame with added 'OBV' column.
    """
    obv = OnBalanceVolumeIndicator(
        close = df['Close'],
        volume = df['Volume'],
        fillna = True
    )
    
    df['OBV'] = obv.on_balance_volume()

    return df


def generate_obv_signals(df: pd.DataFrame) -> pd.DataFrame:
    """
    Generate trading signals based on On-Balance Volume (OBV) indicator.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' and 'Volume' columns.
    - fillna (bool): Parameter that specifies whether or not to fill NaN values.

    Returns:
    - df (pd.DataFrame): DataFrame with added 'OBV' and 'Signal' columns.
    """
    df['Signal'] = 0  # Initialize Signal column
    
    # Generate signals based on OBV
    df.loc[df['OBV'] > df['OBV'].shift(1), 'Signal'] = 1  # Buy signal
    df.loc[df['OBV'] < df['OBV'].shift(1), 'Signal'] = -1  # Sell signal

    return df

## Backtesting Function: Previous Week's Final

In [6]:
def generate_backtest_df(data, data_1m, initial_balance=1000, transaction_fee=0.01):
    """
    Generate a backtesting DataFrame based on MACD signals.

    Args:
        data (pd.DataFrame): DataFrame containing historical data with columns:
                             'Open time (4H)', 'Open', 'High', 'Low', 'Close', 'Volume', 'MACD_Signal'.
                             The DataFrame must have 'Open time (4H)' as a DateTime index.
        data_1m (pd.DataFrame): DataFrame containing 1-minute interval data with 'Open' prices.
        initial_balance (float): Initial balance for the backtest.
        transaction_fee (float): Transaction fee as a percentage of the current balance for each trade.

    Returns:
        pd.DataFrame: A DataFrame containing the backtesting results with columns:
                      'Open time (4H)', 'direction', 'entry price', 'close price', 'PNL', 'Balance'.
    """
    
    def find_first_change(signal):
        # Convert the list to a numpy array
        signal_array = np.array(signal)

        # Find indices of non-zero elements
        non_zero_indices = np.flatnonzero(signal_array != 0)

        # Find where the value changes
        changes = np.where(np.diff(signal_array[non_zero_indices]))[0] + 1
        changes = np.insert(changes, 0, 0)
        
        if len(non_zero_indices) < 1:
            return signal.index[0], signal.index[-1]

        if len(changes) == 1:
            # Get the indices of the first change
            first_change_start = non_zero_indices[changes[0]]
            first_change_end = len(signal) - 1
        else:
            # Get the indices of the first change
            first_change_start = non_zero_indices[changes[0]]
            first_change_end = non_zero_indices[changes[0 + 1]]
        
        # Get the starting and ending time of the direction change
        trade_start_time = signal.index[first_change_start]
        trade_end_time = signal.index[first_change_end]

        # Return the start and end time tuple
        return (trade_start_time, trade_end_time)
    
    # For index name
    index_name = data.index.name

    # For timeframe
    time_frame = pd.infer_freq(data.index)

    # If timeframe is like 'H' or 'D' or 'Y' then append 1 for specificity
    if len(time_frame) == 1:
        time_frame = '1' + time_frame

    # Extract necessary columns as numpy arrays
    high_prices_1m = data_1m['High']
    low_prices_1m = data_1m['Low']
    open_prices_1m = data_1m['Open']
    open_prices = data['Open']
    signals = data['Signal']
    
    # Initialize the exit indices for tp or sl hit (takes the lowest: which happened first)
    exit_index_tp = None
    exit_index_sl = None
    
    # Initialize the backtest results array
    backtest_data = []

    # Initialize trade parameters
    tp = 0.05  # 5% take profit
    sl = 0.03  # 3% stop loss
    
    # Initialize the balance
    balance = initial_balance
    
    # Get last date
    last_date = signals.index[-1]
    
    # Initializing List To Store Directions For The Trade
    directions = []
    
    # Iterate
    while(True):
        trade_start_time, trade_end_time = find_first_change(signals)
            
        direction_start = 'long' if signals[trade_start_time] == 1 else 'short'
        direction_end = 'long' if signals[trade_end_time] == 1 else 'short'
        entry_price = open_prices[trade_start_time]
        
        # Calculate take profit and stop loss prices
        if direction_start == 'long':
            tp_price = entry_price * (1 + tp)
            sl_price = entry_price * (1 - sl)
        else:
            tp_price = entry_price * (1 - tp)
            sl_price = entry_price * (1 + sl)
        
        # Find the exit point for the trade
        exit_index = None
        action = 'direction'  # Default action is direction change
        
        # getting to the closest time of that interval
        # Assuming trade_end_time is a datetime object
        # Basically doing this, so the tp and sl hit only checks and
        # compares from the (T + 1)th time till the trade end time.
        trade_start_time_matching = pd.to_datetime(trade_start_time)
        add_minute = pd.Timedelta('1m')
        trade_start_time_matching = trade_start_time_matching + add_minute
        
        if direction_start == 'long':
            tp_hit = np.where(high_prices_1m[trade_start_time_matching:trade_end_time] >= tp_price)[0]
            sl_hit = np.where(low_prices_1m[trade_start_time_matching:trade_end_time] <= sl_price)[0]
        else:
            tp_hit = np.where(low_prices_1m[trade_start_time_matching:trade_end_time] <= tp_price)[0]
            sl_hit = np.where(high_prices_1m[trade_start_time_matching:trade_end_time] >= sl_price)[0]

        if len(tp_hit) > 0:
            exit_index = tp_hit[0]
            action = 'tp'
        if len(sl_hit) > 0 and (len(tp_hit) == 0 or sl_hit[0] < tp_hit[0]):
            exit_index = sl_hit[0]
            action = 'sl'
            
        if action == 'direction':
            close_price = open_prices[trade_end_time]
        else:
            if action == 'tp':
                if direction_start == 'long':
                    trade_end_time = high_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = high_prices_1m[trade_end_time]
                else:
                    trade_end_time = low_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = low_prices_1m[trade_end_time]
            else:
                if direction_start == 'long':
                    trade_end_time = low_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = low_prices_1m[trade_end_time]
                else:
                    trade_end_time = high_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = high_prices_1m[trade_end_time]
                    
            if len(directions) != 0:
                direction_end = directions[-1]
                direction_start = directions[-1]
        
        # Record the trade entry and exit
        backtest_data.append([trade_start_time, direction_start, entry_price, 0, None])
        backtest_data.append([trade_end_time, direction_end, entry_price, close_price, action])
            
        # getting to the closest time of that interval
        # Assuming trade_end_time is a datetime object
        trade_end_time = pd.to_datetime(trade_end_time)

        # Define the time format
        time_format = pd.Timedelta(time_frame)

        # Calculate the remainder when trade_end_time is divided by time_format
        remainder = trade_end_time.to_numpy().astype('datetime64[ns]').astype(np.int64) % time_format.to_numpy().astype('timedelta64[ns]').astype(np.int64)

        # If remainder is not zero, round up to the next multiple of time_format
        if remainder != 0:
            trade_end_time = trade_end_time + (time_format - pd.Timedelta(remainder, unit='ns'))
            
        # This is the condition that would end the loop (else it would run infinitely)
        if trade_end_time >= last_date:
            break
            
        signals = signals[trade_end_time:]
        
        directions.append(direction_start)
        directions.append(direction_end)

    backtest_df = pd.DataFrame(backtest_data, columns=[index_name, 'direction', 'entry price', 'close price', 'action'])

    # Calculate PNL using vectorized operations
    entry_prices = backtest_df['entry price'][1::2].values
    close_prices = backtest_df['close price'][1::2].values
    directions = backtest_df['direction'][0:-1:2].values

    pnl = np.where(directions == 'long',
                   ((close_prices - entry_prices) / entry_prices) * 100,
                   ((entry_prices - close_prices) / entry_prices) * 100)

    # Insert the PNL values back into the DataFrame
    backtest_df.loc[1::2, 'PNL'] = pnl

    # Update balance considering PNL and transaction fees
    balances = [initial_balance]
    for pnl_value in pnl:
        transaction_cost = balances[-1] * (transaction_fee / 100)
        new_balance = balances[-1] + (np.abs(balances[-1]) * (pnl_value / 100)) - transaction_cost
        balances.append(new_balance)
    
    # Insert the balance values back into the DataFrame
    backtest_df['Balance'] = pd.Series(np.repeat(balances[1:], 2)[:len(backtest_df)])

    return backtest_df

# Main

### Reading The Previously Saved Data For BTCUSDT

In [7]:
BTCUSDT_Filtered_data_1M = pd.read_csv(full_csv_path, usecols = ['Open time (1M)', 'Open', 'High', 'Low', 'Close', 'Volume'])
BTCUSDT_Filtered_data_1M.set_index('Open time (1M)', inplace = True)

# Convert the index to datetime format
BTCUSDT_Filtered_data_1M.index = pd.to_datetime(BTCUSDT_Filtered_data_1M.index)

In [8]:
BTCUSDT_Filtered_data_1M

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 00:00:00,7169.71,7169.71,7165.44,7167.83,3509.860
2020-01-01 00:01:00,7167.83,7168.28,7158.66,7159.95,3821.170
2020-01-01 00:02:00,7161.03,7165.40,7161.03,7162.46,3041.710
2020-01-01 00:03:00,7161.74,7164.27,7160.30,7161.03,3682.650
2020-01-01 00:04:00,7161.03,7164.25,7160.15,7160.15,2936.690
...,...,...,...,...,...
2024-07-18 03:43:00,64280.10,64280.10,64280.10,64280.10,0.000
2024-07-18 03:44:00,64280.10,64438.20,64280.10,64280.10,1.505
2024-07-18 03:45:00,64438.20,64438.20,64280.10,64438.20,0.053
2024-07-18 03:46:00,64280.10,64438.00,64280.10,64280.20,1.544


## Generate Different Timeframe

### Converting 1 Minute Interval Data to 4 Hour Interval Data

In [9]:
BTCUSDT_Filtered_data_4H = convert_1m_to_any_timeframe(copy.deepcopy(BTCUSDT_Filtered_data_1M), '4h')
BTCUSDT_Filtered_data_4H

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 00:00:00,7169.71,7207.23,7156.65,7202.48,3428.062092
2020-01-01 04:00:00,7202.48,9592.00,6871.45,7241.63,285722.475587
2020-01-01 08:00:00,7241.63,7243.46,7215.94,7223.72,3629.004242
2020-01-01 12:00:00,7223.71,7233.33,7178.00,7201.02,3491.032767
2020-01-01 16:00:00,7201.56,7236.58,7187.86,7220.85,3125.298729
...,...,...,...,...,...
2024-07-17 08:00:00,66196.50,66998.80,63555.00,65000.10,4.510771
2024-07-17 12:00:00,65000.10,66000.00,63555.00,65503.90,5.291817
2024-07-17 16:00:00,65446.30,66000.00,63892.40,64689.00,5.464083
2024-07-17 20:00:00,64669.70,64999.00,64240.00,64603.20,9.204992


### Converting 1 Minute Interval Data to 8 Hour Interval Data

In [10]:
BTCUSDT_Filtered_data_8H = convert_1m_to_any_timeframe(copy.deepcopy(BTCUSDT_Filtered_data_1M), '8h')
BTCUSDT_Filtered_data_8H

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (8h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 00:00:00,7169.71,9592.00,6871.45,7241.63,144575.268840
2020-01-01 08:00:00,7241.63,7243.46,7178.00,7201.02,3560.018504
2020-01-01 16:00:00,7201.56,7253.21,7187.86,7242.21,3283.091000
2020-01-02 00:00:00,7242.21,7243.41,7155.02,7169.51,3501.885115
2020-01-02 08:00:00,7169.51,7181.53,7105.17,7160.60,3373.464144
...,...,...,...,...,...
2024-07-16 16:00:00,63470.80,74499.00,56820.20,64647.40,34.055148
2024-07-17 00:00:00,64670.80,66999.90,64400.00,66196.50,7.613233
2024-07-17 08:00:00,66196.50,66998.80,63555.00,65503.90,4.901294
2024-07-17 16:00:00,65446.30,66000.00,63892.40,64603.20,7.334537


# Testing More DL Techniques For Signal Generation

### Splitting Data Into Train And Test And Pre-processing Data

In [11]:
train_data, test_data = get_past_data(copy.deepcopy(BTCUSDT_Filtered_data_4H), '1Y', get_timeframe(BTCUSDT_Filtered_data_4H))
train_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 00:00:00,7169.71,7207.23,7156.65,7202.48,3428.062092
2020-01-01 04:00:00,7202.48,9592.00,6871.45,7241.63,285722.475587
2020-01-01 08:00:00,7241.63,7243.46,7215.94,7223.72,3629.004242
2020-01-01 12:00:00,7223.71,7233.33,7178.00,7201.02,3491.032767
2020-01-01 16:00:00,7201.56,7236.58,7187.86,7220.85,3125.298729
...,...,...,...,...,...
2023-07-17 04:00:00,30333.00,30596.50,30150.00,30376.50,67.483792
2023-07-17 08:00:00,30454.20,30596.50,30136.00,30312.50,69.958179
2023-07-17 12:00:00,30312.50,30498.90,30254.10,30254.10,68.812575
2023-07-17 16:00:00,30497.90,30564.60,30160.60,30300.10,64.068967


In [12]:
test_data

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-18 00:00:00,29657.9,30498.0,29657.8,30058.9,66.161904
2023-07-18 04:00:00,29944.1,30599.0,29919.9,30168.6,64.356150
2023-07-18 08:00:00,30548.4,30548.4,30155.0,30476.0,62.437558
2023-07-18 12:00:00,30476.0,30540.0,30155.0,30276.4,94.781367
2023-07-18 16:00:00,30410.0,30410.0,29960.0,30198.6,65.917963
...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992


### Gated Recurrent Unit (GRU - PyTorch) Model

In [31]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.optim as optim

# # Prepare the data
# df_2 = BTCUSDT_Filtered_data_4H[['Close']].reset_index()
# df_2.columns = ['ds', 'y']

# # Define train-test split
# train_data = df_2[df_2['ds'] < '2023-07-17 20:00:00']
# test_data = df_2[df_2['ds'] >= '2023-07-17 20:00:00']

# Scaling the data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data)
test_scaled = scaler.transform(test_data)

# Create sequences
def create_sequences(data, seq_length, look_ahead=2):
    X, y = [], []
    for i in range(len(data) - seq_length - look_ahead + 1):
        X.append(data[i:i + seq_length])
        # y.append(data[i + seq_length + look_ahead - 1])
        y.append(data[i + seq_length + look_ahead - 1, -2])  # Predicting 'Close' value
    return np.array(X), np.array(y)

# Define sequence length
seq_length = 24

# Create sequences for training and testing
X_train_data, y_train_data = create_sequences(train_scaled, seq_length, look_ahead=2)
X_test_data, y_test_data = create_sequences(test_scaled, seq_length, look_ahead=2)

# Convert to PyTorch tensors
X_train_data = torch.tensor(X_train_data, dtype=torch.float32)
y_train_data = torch.tensor(y_train_data, dtype=torch.float32)
X_test_data = torch.tensor(X_test_data, dtype=torch.float32)
y_test_data = torch.tensor(y_test_data, dtype=torch.float32)

X_train_data

tensor([[[4.7885e-02, 2.1173e-03, 8.0513e-02, 4.8221e-02, 1.1998e-02],
         [4.8221e-02, 4.5143e-03, 7.7304e-02, 4.8622e-02, 1.0000e+00],
         [4.8622e-02, 2.1538e-03, 8.1180e-02, 4.8439e-02, 1.2701e-02],
         ...,
         [4.9777e-02, 2.2803e-03, 8.2243e-02, 4.9407e-02, 1.1291e-02],
         [4.9373e-02, 2.2499e-03, 8.2021e-02, 4.9395e-02, 1.0812e-02],
         [4.9403e-02, 2.3111e-03, 8.1676e-02, 4.9279e-02, 1.0567e-02]],

        [[4.8221e-02, 4.5143e-03, 7.7304e-02, 4.8622e-02, 1.0000e+00],
         [4.8622e-02, 2.1538e-03, 8.1180e-02, 4.8439e-02, 1.2701e-02],
         [4.8439e-02, 2.1436e-03, 8.0753e-02, 4.8206e-02, 1.2218e-02],
         ...,
         [4.9373e-02, 2.2499e-03, 8.2021e-02, 4.9395e-02, 1.0812e-02],
         [4.9403e-02, 2.3111e-03, 8.1676e-02, 4.9279e-02, 1.0567e-02],
         [4.9279e-02, 2.2784e-03, 8.1928e-02, 4.9723e-02, 1.0210e-02]],

        [[4.8622e-02, 2.1538e-03, 8.1180e-02, 4.8439e-02, 1.2701e-02],
         [4.8439e-02, 2.1436e-03, 8.0753e-02,

In [32]:
y_train_data

tensor([0.0507, 0.0508, 0.0504,  ..., 0.2847, 0.2851, 0.2806])

In [35]:
from tqdm import tqdm

# Define the GRU model
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(GRUModel, self).__init__()
        self.hidden_size = hidden_size
        self.gru = nn.GRU(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)
    
    def forward(self, x):
        h0 = torch.zeros(1, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

# Initialize the model, loss function, and optimizer
input_size = 5
hidden_size = 50
output_size = 1
model = GRUModel(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 20
batch_size = 32

for epoch in tqdm(range(num_epochs)):
    model.train()
    for i in range(0, len(X_train_data), batch_size):
        X_batch = X_train_data[i:i+batch_size]
        y_batch = y_train_data[i:i+batch_size]

        # Ensure the target batch is reshaped to match the output shape
        y_batch = y_batch.view(-1, 1)
        
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

  5%|████▏                                                                              | 1/20 [00:02<00:52,  2.75s/it]

Epoch 1/20, Loss: 0.00014408896095119417


 10%|████████▎                                                                          | 2/20 [00:05<00:47,  2.63s/it]

Epoch 2/20, Loss: 1.152137338067405e-05


 15%|████████████▍                                                                      | 3/20 [00:07<00:44,  2.60s/it]

Epoch 3/20, Loss: 1.1117975191154983e-05


 20%|████████████████▌                                                                  | 4/20 [00:10<00:42,  2.63s/it]

Epoch 4/20, Loss: 1.54192530317232e-05


 25%|████████████████████▊                                                              | 5/20 [00:13<00:39,  2.62s/it]

Epoch 5/20, Loss: 1.984718255698681e-05


 30%|████████████████████████▉                                                          | 6/20 [00:15<00:37,  2.65s/it]

Epoch 6/20, Loss: 1.4182936865836382e-05


 35%|█████████████████████████████                                                      | 7/20 [00:18<00:34,  2.64s/it]

Epoch 7/20, Loss: 1.1061551049351692e-05


 40%|█████████████████████████████████▏                                                 | 8/20 [00:21<00:33,  2.80s/it]

Epoch 8/20, Loss: 1.1678093869704753e-05


 45%|█████████████████████████████████████▎                                             | 9/20 [00:24<00:30,  2.81s/it]

Epoch 9/20, Loss: 1.2718658581434283e-05


 50%|█████████████████████████████████████████                                         | 10/20 [00:27<00:29,  2.94s/it]

Epoch 10/20, Loss: 1.3023787687416188e-05


 55%|█████████████████████████████████████████████                                     | 11/20 [00:30<00:26,  2.95s/it]

Epoch 11/20, Loss: 1.2840381714340765e-05


 60%|█████████████████████████████████████████████████▏                                | 12/20 [00:33<00:24,  3.07s/it]

Epoch 12/20, Loss: 1.250615059689153e-05


 65%|█████████████████████████████████████████████████████▎                            | 13/20 [00:37<00:22,  3.18s/it]

Epoch 13/20, Loss: 1.2204352060507517e-05


 70%|█████████████████████████████████████████████████████████▍                        | 14/20 [00:40<00:19,  3.27s/it]

Epoch 14/20, Loss: 1.199162124976283e-05


 75%|█████████████████████████████████████████████████████████████▌                    | 15/20 [00:44<00:16,  3.33s/it]

Epoch 15/20, Loss: 1.1797291335824411e-05


 80%|█████████████████████████████████████████████████████████████████▌                | 16/20 [00:47<00:12,  3.22s/it]

Epoch 16/20, Loss: 1.166961737908423e-05


 85%|█████████████████████████████████████████████████████████████████████▋            | 17/20 [00:50<00:09,  3.11s/it]

Epoch 17/20, Loss: 1.151490505435504e-05


 90%|█████████████████████████████████████████████████████████████████████████▊        | 18/20 [00:52<00:05,  3.00s/it]

Epoch 18/20, Loss: 1.1459955203463323e-05


 95%|█████████████████████████████████████████████████████████████████████████████▉    | 19/20 [00:56<00:03,  3.20s/it]

Epoch 19/20, Loss: 1.1515256119309925e-05


100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:59<00:00,  2.99s/it]

Epoch 20/20, Loss: 1.1519100553414319e-05





In [39]:
# Option 1: Use a separate scaler for the target variable

# Fit a separate scaler on the target column
scaler_target = MinMaxScaler()
scaler_target.fit(np.array(train_data['Close']).reshape(-1, 1))

# Predict and inverse transform the predictions
model.eval()
with torch.no_grad():
    predictions_scaled = model(X_test_data).cpu().numpy()

# Inverse transform using the target scaler
predictions = scaler_target.inverse_transform(predictions_scaled)
actuals = scaler_target.inverse_transform(y_test_data.cpu().numpy().reshape(-1, 1))

# Convert predictions to a DataFrame
results = pd.DataFrame({'Actual': actuals.flatten(), 'Predicted': predictions.flatten()}, index=test_data.index[seq_length+1:])

# Display the results
results

Unnamed: 0_level_0,Actual,Predicted
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1
2023-07-22 04:00:00,29800.998047,30071.865234
2023-07-22 08:00:00,30109.798828,30074.125000
2023-07-22 12:00:00,29889.398438,30031.988281
2023-07-22 16:00:00,30090.000000,30074.136719
2023-07-22 20:00:00,29747.500000,30010.955078
...,...,...
2024-07-17 08:00:00,65000.101562,53043.718750
2024-07-17 12:00:00,65503.902344,53678.269531
2024-07-17 16:00:00,64689.000000,53562.722656
2024-07-17 20:00:00,64603.203125,53642.714844


In [40]:
# Assuming your results DataFrame is named 'results'
# and contains 'Actual' and 'Predicted' columns.

# Define the function to generate signals
def generate_signal(row):
    if row['Actual'] > row['Predicted']:
        return -1
    elif row['Actual'] < row['Predicted']:
        return 1
    else:
        return 0

# Apply the function to create the 'Signal' column
results['Signal'] = results.apply(generate_signal, axis=1)

# Display the updated DataFrame with the 'Signal' column
results

Unnamed: 0_level_0,Actual,Predicted,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-07-22 04:00:00,29800.998047,30071.865234,-1
2023-07-22 08:00:00,30109.798828,30074.125000,1
2023-07-22 12:00:00,29889.398438,30031.988281,-1
2023-07-22 16:00:00,30090.000000,30074.136719,1
2023-07-22 20:00:00,29747.500000,30010.955078,-1
...,...,...,...
2024-07-17 08:00:00,65000.101562,53043.718750,1
2024-07-17 12:00:00,65503.902344,53678.269531,1
2024-07-17 16:00:00,64689.000000,53562.722656,1
2024-07-17 20:00:00,64603.203125,53642.714844,1


In [41]:
df_4h_prev_1y = BTCUSDT_Filtered_data_4H[str(results.index[0]): str(results.index[-1])].copy()
df_4h_prev_1y

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-22 04:00:00,30000.0,30222.0,29801.0,29801.0,62.601533
2023-07-22 08:00:00,29856.8,30200.0,29800.0,30109.8,62.598546
2023-07-22 12:00:00,29830.0,30145.0,29800.3,29889.4,58.040142
2023-07-22 16:00:00,30040.0,30145.0,29710.0,30090.0,64.311046
2023-07-22 20:00:00,29796.1,30130.0,29740.0,29747.5,70.574133
...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992


In [42]:
# Use .loc to ensure proper assignment without the warning
df_4h_prev_1y.loc[:, 'Signal'] = results['Signal']
df_4h_prev_1y

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-22 04:00:00,30000.0,30222.0,29801.0,29801.0,62.601533,-1
2023-07-22 08:00:00,29856.8,30200.0,29800.0,30109.8,62.598546,1
2023-07-22 12:00:00,29830.0,30145.0,29800.3,29889.4,58.040142,-1
2023-07-22 16:00:00,30040.0,30145.0,29710.0,30090.0,64.311046,1
2023-07-22 20:00:00,29796.1,30130.0,29740.0,29747.5,70.574133,-1
...,...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771,1
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817,1
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083,1
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992,1


In [43]:
df_1m_prev_1y = BTCUSDT_Filtered_data_1M[str(results.index[0]): str(results.index[-1])].copy()
df_1m_prev_1y

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-22 04:00:00,30000.0,30098.6,29825.1,29829.6,97.225
2023-07-22 04:01:00,30098.6,30098.6,29830.4,29860.1,58.695
2023-07-22 04:02:00,30098.6,30098.6,29860.1,30098.6,54.673
2023-07-22 04:03:00,30097.7,30098.6,29901.0,29915.1,19.208
2023-07-22 04:04:00,30041.8,30098.6,29933.9,29937.7,158.855
...,...,...,...,...,...
2024-07-17 23:56:00,64637.9,64742.9,64242.9,64443.6,6.473
2024-07-17 23:57:00,64443.6,64742.9,64242.9,64700.0,2.546
2024-07-17 23:58:00,64700.0,64700.0,64603.2,64603.2,1.115
2024-07-17 23:59:00,64689.0,64700.0,64603.2,64603.2,0.079


In [44]:
backtest_df_gru_torch = generate_backtest_df(df_4h_prev_1y, df_1m_prev_1y)
backtest_df_gru_torch

Unnamed: 0,Open time (4h),direction,entry price,close price,action,PNL,Balance
0,2023-07-22 04:00:00,short,30000.0,0.0,,,1004.673333
1,2023-07-22 08:00:00,long,30000.0,29856.8,direction,0.477333,1004.673333
2,2023-07-22 08:00:00,long,29856.8,0.0,,,1003.671053
3,2023-07-22 12:00:00,short,29856.8,29830.0,direction,-0.089762,1003.671053
4,2023-07-22 12:00:00,short,29830.0,0.0,,,996.504950
...,...,...,...,...,...,...,...
461,2024-07-15 18:40:00,long,61889.4,59000.2,sl,-4.668328,2687.121839
462,2024-07-15 20:00:00,long,62625.8,0.0,,,2918.519760
463,2024-07-16 20:03:00,long,62625.8,68025.0,tp,8.621367,2918.519760
464,2024-07-17 00:00:00,long,64670.8,0.0,,,2918.895816


In [45]:
backtest_df_gru_torch.PNL.sum()

133.58102461348

### Gated Recurrent Unit (GRU - Tensorflow) Model

In [52]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Input, Dropout, BatchNormalization, RepeatVector, TimeDistributed
from sklearn.metrics import mean_squared_error

# # Prepare the data
# df_2 = BTCUSDT_Filtered_data_4H[['Open', 'High', 'Low', 'Close']].reset_index()
# df_2.columns = ['ds', 'Open', 'High', 'Low', 'Close']

# # Define train-test split
# train_data = df_2[df_2['ds'] < '2023-07-17 20:00:00']
# test_data = df_2[df_2['ds'] >= '2023-07-17 20:00:00']

# Scaling the data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data[['Open', 'High', 'Low', 'Close']])
test_scaled = scaler.transform(test_data[['Open', 'High', 'Low', 'Close']])

# Create sequences
def create_sequences(data, seq_length, look_ahead=2):
    X, y = [], []
    for i in range(len(data) - seq_length - look_ahead + 1):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length + look_ahead - 1, -1])  # Predicting 'Close' value
    return np.array(X), np.array(y)

# Define sequence length
seq_length = 74

# Create sequences for training and testing
X_train_data, y_train_data = create_sequences(train_scaled, seq_length, look_ahead=2)
X_test_data, y_test_data = create_sequences(test_scaled, seq_length, look_ahead=2)

# Reshape input to be [samples, time steps, features]
X_train_data = X_train_data.reshape((X_train_data.shape[0], X_train_data.shape[1], 4))
X_test_data = X_test_data.reshape((X_test_data.shape[0], X_test_data.shape[1], 4))

X_train_data

array([[[0.0478847 , 0.00211734, 0.08051301, 0.0482208 ],
        [0.0482208 , 0.00451434, 0.07730448, 0.04862234],
        [0.04862234, 0.00215376, 0.08118003, 0.04843865],
        ...,
        [0.05782623, 0.00310729, 0.09082172, 0.05761762],
        [0.05761772, 0.00307971, 0.09106427, 0.05799618],
        [0.05800777, 0.0031098 , 0.09123145, 0.05753372]],

       [[0.0482208 , 0.00451434, 0.07730448, 0.04862234],
        [0.04862234, 0.00215376, 0.08118003, 0.04843865],
        [0.04843855, 0.00214357, 0.0807532 , 0.04820583],
        ...,
        [0.05761772, 0.00307971, 0.09106427, 0.05799618],
        [0.05800777, 0.0031098 , 0.09123145, 0.05753372],
        [0.05755741, 0.00304054, 0.09066174, 0.05706551]],

       [[0.04862234, 0.00215376, 0.08118003, 0.04843865],
        [0.04843855, 0.00214357, 0.0807532 , 0.04820583],
        [0.04821137, 0.00214684, 0.08086413, 0.04840921],
        ...,
        [0.05800777, 0.0031098 , 0.09123145, 0.05753372],
        [0.05755741, 0.003040

In [53]:
y_train_data

array([0.05764951, 0.05731854, 0.05752151, ..., 0.2846501 , 0.2851219 ,
       0.28058442])

In [54]:
model = Sequential()
model.add(Input(shape=(seq_length, 4)))
model.add(GRU(128, activation='relu', return_sequences=True))
model.add(GRU(128, return_sequences=True))
model.add(Dropout(0.3))
model.add(GRU(64, activation='relu', return_sequences=True))
model.add(Dropout(0.2))
model.add(GRU(32, activation='relu', return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse')
model.summary()

In [55]:
# Train the model
model.fit(X_train_data, y_train_data, epochs=20, batch_size=32, verbose=1)

Epoch 1/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 151ms/step - loss: 0.0147
Epoch 2/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 166ms/step - loss: 0.0031
Epoch 3/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 173ms/step - loss: 0.0032
Epoch 4/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 192ms/step - loss: 0.0025
Epoch 5/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 158ms/step - loss: 0.0022
Epoch 6/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 169ms/step - loss: 0.0028
Epoch 7/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 175ms/step - loss: 0.0019
Epoch 8/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 166ms/step - loss: 0.0023
Epoch 9/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 167ms/step - loss: 0.0026
Epoch 10/20
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[

<keras.src.callbacks.history.History at 0x2f3e3feaba0>

In [56]:
# # Predict
# predictions_scaled = model.predict(X_test_data)

# # Reconstruct scaled predictions for inverse transform
# reconstructed_scaled = np.zeros((predictions_scaled.shape[0], train_data.shape[1]))
# reconstructed_scaled[:, -1] = predictions_scaled.flatten()  # Place predictions in the 'Close' column index

# # Inverse transform to get the original scale
# predictions = scaler.inverse_transform(reconstructed_scaled)[:, -1]

# # Get actual values for comparison
# actuals_reconstructed = np.zeros((y_test_data.shape[0], train_data.shape[1]))
# actuals_reconstructed[:, -1] = y_test_data.flatten()
# actuals = scaler.inverse_transform(actuals_reconstructed)[:, -1]

# # Convert predictions to a DataFrame
# results = pd.DataFrame({'Actual': actuals.flatten(), 'Predicted': predictions.flatten()}, index=test_data.index[seq_length + 1:])

# # Display the results
# results


# Predict
predictions_scaled = model.predict(X_test_data)
predictions = scaler.inverse_transform(np.hstack((np.zeros((predictions_scaled.shape[0], 3)), predictions_scaled)))[:, -1]
actuals = scaler.inverse_transform(np.hstack((np.zeros((y_test_data.shape[0], 3)), y_test_data.reshape(-1, 1))))[:, -1]

# Convert predictions to a DataFrame
results = pd.DataFrame({'Actual': actuals.flatten(), 'Predicted': predictions.flatten()}, index=test_data.index[seq_length+1:])

# Display the results
results

[1m67/67[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 55ms/step


Unnamed: 0_level_0,Actual,Predicted
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1
2023-07-30 12:00:00,29469.5,30171.533497
2023-07-30 16:00:00,29402.1,30212.942580
2023-07-30 20:00:00,29500.0,30254.865971
2023-07-31 00:00:00,29140.6,30239.640121
2023-07-31 04:00:00,29434.7,30243.725530
...,...,...
2024-07-17 08:00:00,65000.1,61312.596182
2024-07-17 12:00:00,65503.9,61815.694308
2024-07-17 16:00:00,64689.0,62148.751067
2024-07-17 20:00:00,64603.2,62262.590450


In [57]:
# Assuming your results DataFrame is named 'results'
# and contains 'Actual' and 'Predicted' columns.

# Define the function to generate signals
def generate_signal(row):
    if row['Actual'] > row['Predicted']:
        return -1
    elif row['Actual'] < row['Predicted']:
        return 1
    else:
        return 0

# Apply the function to create the 'Signal' column
results['Signal'] = results.apply(generate_signal, axis=1)

# Display the updated DataFrame with the 'Signal' column
results

Unnamed: 0_level_0,Actual,Predicted,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-07-30 12:00:00,29469.5,30171.533497,-1
2023-07-30 16:00:00,29402.1,30212.942580,-1
2023-07-30 20:00:00,29500.0,30254.865971,-1
2023-07-31 00:00:00,29140.6,30239.640121,-1
2023-07-31 04:00:00,29434.7,30243.725530,-1
...,...,...,...
2024-07-17 08:00:00,65000.1,61312.596182,1
2024-07-17 12:00:00,65503.9,61815.694308,1
2024-07-17 16:00:00,64689.0,62148.751067,1
2024-07-17 20:00:00,64603.2,62262.590450,1


In [58]:
df_4h_prev_1y = BTCUSDT_Filtered_data_4H[str(results.index[0]): str(results.index[-1])].copy()
df_4h_prev_1y

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-30 12:00:00,29569.1,29580.0,29258.0,29469.5,74.570408
2023-07-30 16:00:00,29469.5,29586.7,29234.0,29402.1,70.032904
2023-07-30 20:00:00,29439.9,29560.0,29350.0,29500.0,71.621125
2023-07-31 00:00:00,29491.4,29515.8,29047.6,29140.6,75.299638
2023-07-31 04:00:00,29174.5,29515.0,29111.0,29434.7,72.914412
...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992


In [59]:
# Use .loc to ensure proper assignment without the warning
df_4h_prev_1y.loc[:, 'Signal'] = results['Signal']
df_4h_prev_1y

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-30 12:00:00,29569.1,29580.0,29258.0,29469.5,74.570408,-1
2023-07-30 16:00:00,29469.5,29586.7,29234.0,29402.1,70.032904,-1
2023-07-30 20:00:00,29439.9,29560.0,29350.0,29500.0,71.621125,-1
2023-07-31 00:00:00,29491.4,29515.8,29047.6,29140.6,75.299638,-1
2023-07-31 04:00:00,29174.5,29515.0,29111.0,29434.7,72.914412,-1
...,...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771,1
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817,1
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083,1
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992,1


In [60]:
df_1m_prev_1y = BTCUSDT_Filtered_data_1M[str(results.index[0]): str(results.index[-1])].copy()
df_1m_prev_1y

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-30 12:00:00,29569.1,29580.0,29258.0,29580.0,92.586
2023-07-30 12:01:00,29510.4,29580.0,29258.0,29258.0,87.162
2023-07-30 12:02:00,29333.4,29580.0,29258.0,29580.0,45.377
2023-07-30 12:03:00,29539.6,29580.0,29260.1,29260.1,70.815
2023-07-30 12:04:00,29275.7,29580.0,29258.0,29258.0,37.920
...,...,...,...,...,...
2024-07-17 23:56:00,64637.9,64742.9,64242.9,64443.6,6.473
2024-07-17 23:57:00,64443.6,64742.9,64242.9,64700.0,2.546
2024-07-17 23:58:00,64700.0,64700.0,64603.2,64603.2,1.115
2024-07-17 23:59:00,64689.0,64700.0,64603.2,64603.2,0.079


In [61]:
backtest_df_gru_tf = generate_backtest_df(df_4h_prev_1y, df_1m_prev_1y)
backtest_df_gru_tf

Unnamed: 0,Open time (4h),direction,entry price,close price,action,PNL,Balance
0,2023-07-30 12:00:00,short,29569.1,0.0,,,1049.982688
1,2023-08-17 21:01:00,long,29569.1,28088.2,tp,5.008269,1049.982688
2,2023-08-18 00:00:00,long,28339.0,0.0,,,970.626023
3,2023-08-18 02:43:00,long,28339.0,26200.0,tp,-7.547902,970.626023
4,2023-08-18 04:00:00,long,27775.3,0.0,,,921.370909
...,...,...,...,...,...,...,...
761,2024-07-15 18:40:00,long,61889.4,59000.2,sl,-4.668328,53916.390618
762,2024-07-15 20:00:00,long,62625.8,0.0,,,58559.328840
763,2024-07-16 20:03:00,long,62625.8,68025.0,tp,8.621367,58559.328840
764,2024-07-17 00:00:00,long,64670.8,0.0,,,58566.874289


In [62]:
backtest_df_gru_tf.PNL.sum()

436.5789750025367

### Transformers (Tensorflow) Model

In [65]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D, Add
from tensorflow.keras.optimizers import Adam

# # Prepare the data
# df_2 = BTCUSDT_Filtered_data_4H[['Open', 'High', 'Low', 'Close']].reset_index()
# df_2.columns = ['ds', 'Open', 'High', 'Low', 'Close']

# # Define train-test split
# train_data = df_2[df_2['ds'] < '2023-07-17 20:00:00']
# test_data = df_2[df_2['ds'] >= '2023-07-17 20:00:00']

# Scaling the data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train_data[['Open', 'High', 'Low', 'Close']])
test_scaled = scaler.transform(test_data[['Open', 'High', 'Low', 'Close']])

# Create sequences
def create_sequences(data, seq_length, look_ahead=2):
    X, y = [], []
    for i in range(len(data) - seq_length - look_ahead + 1):
        X.append(data[i:i + seq_length])
        y.append(data[i + seq_length + look_ahead - 1, -1])  # Predicting 'Close' value
    return np.array(X), np.array(y)

# Define sequence length
seq_length = 37

# Create sequences for training and testing
X_train_data, y_train_data = create_sequences(train_scaled, seq_length, look_ahead=2)
X_test_data, y_test_data = create_sequences(test_scaled, seq_length, look_ahead=2)

# Reshape input to be [samples, time steps, features]
X_train_data = X_train_data.reshape((X_train_data.shape[0], X_train_data.shape[1], 4))
X_test_data = X_test_data.reshape((X_test_data.shape[0], X_test_data.shape[1], 4))

X_train_data

array([[[0.0478847 , 0.00211734, 0.08051301, 0.0482208 ],
        [0.0482208 , 0.00451434, 0.07730448, 0.04862234],
        [0.04862234, 0.00215376, 0.08118003, 0.04843865],
        ...,
        [0.05188843, 0.00251164, 0.08427876, 0.05175068],
        [0.05175068, 0.00247306, 0.08449105, 0.05159304],
        [0.05158832, 0.00266949, 0.08464473, 0.05344301]],

       [[0.0482208 , 0.00451434, 0.07730448, 0.04862234],
        [0.04862234, 0.00215376, 0.08118003, 0.04843865],
        [0.04843855, 0.00214357, 0.0807532 , 0.04820583],
        ...,
        [0.05175068, 0.00247306, 0.08449105, 0.05159304],
        [0.05158832, 0.00266949, 0.08464473, 0.05344301],
        [0.05344301, 0.00291316, 0.08674805, 0.05531605]],

       [[0.04862234, 0.00215376, 0.08118003, 0.04843865],
        [0.04843855, 0.00214357, 0.0807532 , 0.04820583],
        [0.04821137, 0.00214684, 0.08086413, 0.04840921],
        ...,
        [0.05158832, 0.00266949, 0.08464473, 0.05344301],
        [0.05344301, 0.002913

In [66]:
y_train_data

array([0.05507226, 0.05524159, 0.05477902, ..., 0.2846501 , 0.2851219 ,
       0.28058442])

In [76]:
# Define transformer model components
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Dropout(dropout)(x)
    res = Add()([x, inputs])

    # Feed Forward Part
    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return Add()([x, res])

# Build the transformer model
input_layer = Input(shape=(seq_length, 4))
x = transformer_encoder(input_layer, head_size=256, num_heads=4, ff_dim=128, dropout=0.1)
x = transformer_encoder(x, head_size=128, num_heads=4, ff_dim=64, dropout=0.1)
x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=32, dropout=0.1)
x = GlobalAveragePooling1D(data_format="channels_first")(x)
x = Dropout(0.1)(x)
output_layer = Dense(1)(x)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer=Adam(learning_rate=1e-4), loss='mse')
model.summary()

In [77]:
# Train the model
model.fit(X_train_data, y_train_data, epochs=20, batch_size=32, verbose=1)

Epoch 1/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 59ms/step - loss: 0.0286
Epoch 2/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 59ms/step - loss: 0.0161
Epoch 3/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 81ms/step - loss: 0.0144
Epoch 4/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 82ms/step - loss: 0.0120
Epoch 5/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 83ms/step - loss: 0.0100
Epoch 6/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 81ms/step - loss: 0.0095
Epoch 7/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 103ms/step - loss: 0.0090
Epoch 8/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 61ms/step - loss: 0.0088
Epoch 9/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 67ms/step - loss: 0.0078
Epoch 10/20
[1m242/242[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1

<keras.src.callbacks.history.History at 0x2f388bf2c90>

In [78]:
# Predict
predictions_scaled = model.predict(X_test_data)
predictions = scaler.inverse_transform(np.hstack((np.zeros((predictions_scaled.shape[0], 3)), predictions_scaled)))[:, -1]
actuals = scaler.inverse_transform(np.hstack((np.zeros((y_test_data.shape[0], 3)), y_test_data.reshape(-1, 1))))[:, -1]

# Convert predictions to a DataFrame
results = pd.DataFrame({'Actual': actuals.flatten(), 'Predicted': predictions.flatten()}, index=test_data.index[seq_length+1:])

# Display the results
results

[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 31ms/step


Unnamed: 0_level_0,Actual,Predicted
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1
2023-07-24 08:00:00,29831.0,29338.060570
2023-07-24 12:00:00,29829.9,29406.332818
2023-07-24 16:00:00,29000.0,29610.681745
2023-07-24 20:00:00,29073.6,29293.998587
2023-07-25 00:00:00,29116.5,29389.581477
...,...,...
2024-07-17 08:00:00,65000.1,58631.004310
2024-07-17 12:00:00,65503.9,59012.127103
2024-07-17 16:00:00,64689.0,59218.797681
2024-07-17 20:00:00,64603.2,59736.040738


In [79]:
# Assuming your results DataFrame is named 'results'
# and contains 'Actual' and 'Predicted' columns.

# Define the function to generate signals
def generate_signal(row):
    if row['Actual'] > row['Predicted']:
        return -1
    elif row['Actual'] < row['Predicted']:
        return 1
    else:
        return 0

# Apply the function to create the 'Signal' column
results['Signal'] = results.apply(generate_signal, axis=1)

# Display the updated DataFrame with the 'Signal' column
results

Unnamed: 0_level_0,Actual,Predicted,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2023-07-24 08:00:00,29831.0,29338.060570,1
2023-07-24 12:00:00,29829.9,29406.332818,1
2023-07-24 16:00:00,29000.0,29610.681745,-1
2023-07-24 20:00:00,29073.6,29293.998587,-1
2023-07-25 00:00:00,29116.5,29389.581477,-1
...,...,...,...
2024-07-17 08:00:00,65000.1,58631.004310,1
2024-07-17 12:00:00,65503.9,59012.127103,1
2024-07-17 16:00:00,64689.0,59218.797681,1
2024-07-17 20:00:00,64603.2,59736.040738,1


In [80]:
BTCUSDT_Filtered_data_4H_Prev_1Y_With_XFormer = BTCUSDT_Filtered_data_4H['2023-07-24 04:00:00': '2024-07-18 00:00:00'].copy()
BTCUSDT_Filtered_data_4H_Prev_1Y_With_XFormer

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-24 04:00:00,30022.6,30180.0,29852.6,29860.6,64.682383
2023-07-24 08:00:00,29852.6,30358.2,29732.1,29831.0,66.823188
2023-07-24 12:00:00,29986.7,30329.0,29350.0,29829.9,120.010104
2023-07-24 16:00:00,29540.0,29829.9,29000.0,29000.0,123.569971
2023-07-24 20:00:00,29040.2,29280.8,28849.7,29073.6,113.160963
...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992


In [81]:
# Use .loc to ensure proper assignment without the warning
BTCUSDT_Filtered_data_4H_Prev_1Y_With_XFormer.loc[:, 'Signal'] = results['Signal']
BTCUSDT_Filtered_data_4H_Prev_1Y_With_XFormer

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-24 04:00:00,30022.6,30180.0,29852.6,29860.6,64.682383,
2023-07-24 08:00:00,29852.6,30358.2,29732.1,29831.0,66.823188,1.0
2023-07-24 12:00:00,29986.7,30329.0,29350.0,29829.9,120.010104,1.0
2023-07-24 16:00:00,29540.0,29829.9,29000.0,29000.0,123.569971,-1.0
2023-07-24 20:00:00,29040.2,29280.8,28849.7,29073.6,113.160963,-1.0
...,...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771,1.0
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817,1.0
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083,1.0
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992,1.0


In [82]:
BTCUSDT_Filtered_data_1M_Prev_1Y = BTCUSDT_Filtered_data_1M['2023-07-27 20:00:00': '2024-07-18 00:00:00'].copy()
BTCUSDT_Filtered_data_1M_Prev_1Y

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-27 20:00:00,29333.3,29357.4,29271.7,29280.8,38.688
2023-07-27 20:01:00,29303.9,29349.6,29200.0,29200.0,170.174
2023-07-27 20:02:00,29200.0,29244.7,29200.0,29242.9,7.268
2023-07-27 20:03:00,29242.9,29244.7,29242.9,29244.7,4.508
2023-07-27 20:04:00,29244.8,29421.6,29244.8,29317.4,53.746
...,...,...,...,...,...
2024-07-17 23:56:00,64637.9,64742.9,64242.9,64443.6,6.473
2024-07-17 23:57:00,64443.6,64742.9,64242.9,64700.0,2.546
2024-07-17 23:58:00,64700.0,64700.0,64603.2,64603.2,1.115
2024-07-17 23:59:00,64689.0,64700.0,64603.2,64603.2,0.079


In [83]:
backtest_df_xformer = generate_backtest_df(BTCUSDT_Filtered_data_4H_Prev_1Y_With_XFormer, BTCUSDT_Filtered_data_1M_Prev_1Y)
backtest_df_xformer

Unnamed: 0,Open time (4h),direction,entry price,close price,action,PNL,Balance
0,2023-07-24 04:00:00,short,30022.6,0.0,,,1005.562401
1,2023-07-24 08:00:00,long,30022.6,29852.6,direction,0.566240,1005.562401
2,2023-07-24 08:00:00,long,29852.6,0.0,,,994.932149
3,2023-07-24 16:00:00,short,29852.6,29540.0,direction,-1.047145,994.932149
4,2023-07-24 16:00:00,short,29540.0,0.0,,,1004.573156
...,...,...,...,...,...,...,...
607,2024-07-15 18:40:00,long,61889.4,59000.2,sl,-4.668328,38216.314335
608,2024-07-15 20:00:00,long,62625.8,0.0,,,41507.261383
609,2024-07-16 20:03:00,long,62625.8,68025.0,tp,8.621367,41507.261383
610,2024-07-17 00:00:00,long,64670.8,0.0,,,41512.609649


In [84]:
backtest_df_xformer.PNL.sum()

401.2777897613021

# Log: For Custom DL Models, Here Are The Top Performing Strategies (Highest Balance Plus Highest PNL)

<div style="text-align: center;">
<table style="width: 50%; border-collapse: collapse;">
    <tr style="background-color: #f2f2f2; border-bottom: 2px solid #cccccc;">
        <th>Timeframe</th>
        <th>Strategy</th>
        <th>PNL</th>
    </tr>
    <tr style="background-color: #ffebcc; border-bottom: 1px solid #cccccc;" onmouseover="this.style.backgroundColor='#ffe6cc';" onmouseout="this.style.backgroundColor='#ffebcc';">
        <td>4H</td>
        <td>GRU_TF</td>
        <td>436.58</td>
    </tr>
    <tr style="background-color: #ffebcc; border-bottom: 1px solid #cccccc;" onmouseover="this.style.backgroundColor='#ffe6cc';" onmouseout="this.style.backgroundColor='#ffebcc';">
        <td>4H</td>
        <td>XFORMER_TF</td>
        <td>401.28</td>
    </tr>
    <tr style="background-color: #ffebcc; border-bottom: 1px solid #cccccc;" onmouseover="this.style.backgroundColor='#ffe6cc';" onmouseout="this.style.backgroundColor='#ffebcc';">
        <td>4H</td>
        <td>GRU_TORCH</td>
        <td>133.58</td>
    </tr>
</table>
</div>

# Day 23: Calculating Some Stats On The Ledger

### Creating Ledger Backtested On OBV

In [14]:
_, test_data_4h = get_past_data(copy.deepcopy(BTCUSDT_Filtered_data_4H), '1Y', get_timeframe(BTCUSDT_Filtered_data_4H))
test_data_4h

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-18 00:00:00,29657.9,30498.0,29657.8,30058.9,66.161904
2023-07-18 04:00:00,29944.1,30599.0,29919.9,30168.6,64.356150
2023-07-18 08:00:00,30548.4,30548.4,30155.0,30476.0,62.437558
2023-07-18 12:00:00,30476.0,30540.0,30155.0,30276.4,94.781367
2023-07-18 16:00:00,30410.0,30410.0,29960.0,30198.6,65.917963
...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992


In [15]:
test_data_4h_obv = calculate_obv(copy.deepcopy(test_data_4h), fillna=True)
test_data_4h_obv = generate_obv_signals(test_data_4h_obv)
test_data_4h_obv

Unnamed: 0_level_0,Open,High,Low,Close,Volume,OBV,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-07-18 00:00:00,29657.9,30498.0,29657.8,30058.9,66.161904,66.161904,0
2023-07-18 04:00:00,29944.1,30599.0,29919.9,30168.6,64.356150,130.518054,1
2023-07-18 08:00:00,30548.4,30548.4,30155.0,30476.0,62.437558,192.955613,1
2023-07-18 12:00:00,30476.0,30540.0,30155.0,30276.4,94.781367,98.174246,-1
2023-07-18 16:00:00,30410.0,30410.0,29960.0,30198.6,65.917963,32.256283,-1
...,...,...,...,...,...,...,...
2024-07-17 08:00:00,66196.5,66998.8,63555.0,65000.1,4.510771,5066.913544,-1
2024-07-17 12:00:00,65000.1,66000.0,63555.0,65503.9,5.291817,5072.205360,1
2024-07-17 16:00:00,65446.3,66000.0,63892.4,64689.0,5.464083,5066.741277,-1
2024-07-17 20:00:00,64669.7,64999.0,64240.0,64603.2,9.204992,5057.536285,-1


In [16]:
test_data_1m = BTCUSDT_Filtered_data_1M[str(test_data_4h_obv.index[0]): str(test_data_4h_obv.index[-1])]
test_data_1m

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-18 00:00:00,29657.9,30208.4,29657.9,30027.7,37.307
2023-07-18 00:01:00,29862.7,30013.7,29657.8,29969.3,17.256
2023-07-18 00:02:00,29854.0,30208.4,29657.9,29657.9,245.683
2023-07-18 00:03:00,29657.9,29957.3,29657.9,29658.0,64.483
2023-07-18 00:04:00,29757.0,30232.5,29750.0,30232.5,81.386
...,...,...,...,...,...
2024-07-17 23:56:00,64637.9,64742.9,64242.9,64443.6,6.473
2024-07-17 23:57:00,64443.6,64742.9,64242.9,64700.0,2.546
2024-07-17 23:58:00,64700.0,64700.0,64603.2,64603.2,1.115
2024-07-17 23:59:00,64689.0,64700.0,64603.2,64603.2,0.079


In [17]:
backtest_df_obv_test = generate_backtest_df(test_data_4h_obv, test_data_1m)
backtest_df_obv_test

Unnamed: 0,Open time (4h),direction,entry price,close price,action,PNL,Balance
0,2023-07-18 04:00:00,long,29944.1,0.0,,,1.017663e+03
1,2023-07-18 12:00:00,short,29944.1,30476.0,direction,1.776310,1.017663e+03
2,2023-07-18 12:00:00,short,30476.0,0.0,,,1.023539e+03
3,2023-07-19 00:00:00,long,30476.0,30297.0,direction,0.587347,1.023539e+03
4,2023-07-19 00:00:00,long,30297.0,0.0,,,1.008666e+03
...,...,...,...,...,...,...,...
2591,2024-07-17 12:00:00,long,66196.5,65000.1,direction,1.807346,6.950016e+08
2592,2024-07-17 12:00:00,long,65000.1,0.0,,,6.997031e+08
2593,2024-07-17 16:00:00,short,65000.1,65446.3,direction,0.686460,6.997031e+08
2594,2024-07-17 16:00:00,short,65446.3,0.0,,,7.077659e+08


### Calculation Of Stats

In [27]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from dateutil import relativedelta
import time
from scipy import stats
# import sqlite3
import os
import quantstats as qs
from scipy.stats import linregress

# root_dir = os.path.dirname(os.path.abspath(__file__))

def get_last_pnl_scores(ledger):
    pnl_cols = ['pnl_sum_1','pnl_sum_7','pnl_sum_15','pnl_sum_30','pnl_sum_45','pnl_sum_60']
    pnl_cols_val = []
    for col in pnl_cols:
        pnl_cols_val.append(ledger.iloc[-1][col])
    return pnl_cols_val


def calculate_pnl_sum_all(df):
    
    date_column = df.columns[0]
    
    # Fill NaN values in PNL column with 0 for cumulative sum calculation
    df['PNL'] = df['PNL'].fillna(0.0)
    
    # Adding the pnl_sum column to the dataframe
    df['pnl_sum'] = df['PNL'].cumsum()
    
    df['pnl_sum_1'] = -100.0
    df['pnl_sum_7'] = -100.0
    df['pnl_sum_30'] = -100.0

    df['pnl_sum_15'] = -100.0
    df['pnl_sum_45'] = -100.0
    df['pnl_sum_60'] = -100.0
    
    df[date_column] = pd.to_datetime(df[date_column])
    start_date = df.iloc[0][date_column]
    end_date = df.iloc[-1][date_column]

    list_1 = []
    list_7 = []
    list_30 = []
    for i in range(len(df)):
        current_date = df.iloc[i][date_column]
        days_ago_1 = current_date - timedelta(days = 1)
        days_ago_7 = current_date - timedelta(days = 7)
        days_ago_15 = current_date - timedelta(days = 15)
        days_ago_30 = current_date - timedelta(days = 30)
        days_ago_45 = current_date - timedelta(days = 45)
        days_ago_60 = current_date - timedelta(days = 60)

        if days_ago_1 < start_date:
            df.at[i, 'pnl_sum_1'] = df.iloc[i]['pnl_sum']
        else:
            filtered_df = df.loc[(df[date_column] >= days_ago_1) & (df[date_column] <= current_date)]
            filtered_df = filtered_df.drop_duplicates(keep="last")
            df.at[i, 'pnl_sum_1'] = filtered_df['PNL'].sum()
            
        if days_ago_7 < start_date:
            df.at[i, 'pnl_sum_7'] = df.iloc[i]['pnl_sum']
        else:
            filtered_df = df.loc[(df[date_column] >= days_ago_7) & (df[date_column] <= current_date)]
            df.at[i, 'pnl_sum_7'] = filtered_df['PNL'].sum()
            
        if days_ago_30 < start_date:
            df.at[i, 'pnl_sum_30'] = df.iloc[i]['pnl_sum']
        else:
            filtered_df = df.loc[(df[date_column] >= days_ago_30) & (df[date_column] <= current_date)]
            df.at[i, 'pnl_sum_30'] = filtered_df['PNL'].sum()
    

        if days_ago_15 < start_date:
            df.at[i, 'pnl_sum_15'] = df.iloc[i]['pnl_sum']
        else:
            filtered_df = df.loc[(df[date_column] >= days_ago_15) & (df[date_column] <= current_date)]
            df.at[i, 'pnl_sum_15'] = filtered_df['PNL'].sum()


        if days_ago_45 < start_date:
            df.at[i, 'pnl_sum_45'] = df.iloc[i]['pnl_sum']
        else:
            filtered_df = df.loc[(df[date_column] >= days_ago_45) & (df[date_column] <= current_date)]
            df.at[i, 'pnl_sum_45'] = filtered_df['PNL'].sum()


        if days_ago_60 < start_date:
            df.at[i, 'pnl_sum_60'] = df.iloc[i]['pnl_sum']
        else:
            filtered_df = df.loc[(df[date_column] >= days_ago_60) & (df[date_column] <= current_date)]
            df.at[i, 'pnl_sum_60'] = filtered_df['PNL'].sum()
    df = df.round({'pnl_sum_1':2,'pnl_sum_7':2,'pnl_sum_15':2,'pnl_sum_30':2,'pnl_sum_45':2,'pnl_sum_60':2})            
    return df

# ## read ledger table from database
# def get_ledger(stratName):
#     sql_connection = sqlite3.connect(os.path.join(root_dir,'db/zt_simulater.db'))
#     cursor = sql_connection.cursor()
#     strat_ledger = pd.read_sql_query(f'select * from {stratName}',sql_connection)
#     cursor.close()
#     sql_connection.close()
#     return strat_ledger

def calculate_diff_date(start, end):
    # convert string to date object
    start = start.split('+')[0]
    end = end.split('+')[0]
    start_date = datetime.strptime(start, "%Y-%m-%d %H:%M:%S")
    end_date = datetime.strptime(end, "%Y-%m-%d %H:%M:%S")
    diff = end_date - start_date
    #diff = relativedelta.relativedelta(end_date, start_date)
    # print('Complete Difference between two dates: ')
    return diff.days


### calculate drawdown longest drawdown,current drawdown
def longest_drawdown(pnl_cum_list, date):
    #print(pnl_cum_list, date)
    drawdown_list = []
    
    start_end_date_drawdown = []
    flag_for_start_date_drawdown = False
    counter = 0
    queue = []
    durations_in_days = []
    
    max_drawdown = float('-inf')
    max_drawdown_duration = 0

    curr_drawdown = float('-inf')
    curr_drawdown_duration = 0
    
    maxPnl = pnl_cum_list[0]

    for counter,value in enumerate(pnl_cum_list):

        if value < maxPnl:
            if flag_for_start_date_drawdown == False:
                drawdown = maxPnl - value
                drawdown_list.append(drawdown)
                flag_for_start_date_drawdown = True
                start_end_date_drawdown.append(str(date[counter]))
                queue.append(str(date[counter]))
                curr_drawdown = drawdown
                start_date = queue[0]
                curr_drawdown_duration = 0
                if drawdown > max_drawdown:
                    max_drawdown = drawdown
                
            else:
                drawdown = maxPnl - value
                drawdown_list.append(drawdown)
                curr_drawdown = drawdown
                
                start_date = queue[0]
                end_date = str(date[counter])
                
                
                days_diff = calculate_diff_date(start_date, end_date)
                curr_drawdown_duration = days_diff
                
                if int(curr_drawdown_duration) >= int(max_drawdown_duration):
                    max_drawdown_duration = curr_drawdown_duration
                
                
                if drawdown > max_drawdown:
                    max_drawdown = drawdown
                                    
        elif value >= maxPnl:
            if(flag_for_start_date_drawdown == True):
                start_end_date_drawdown.append(str(date[counter]))
                start_date = queue.pop(0)
                end_date = str(date[counter])
                days_diff = calculate_diff_date(start_date, end_date)
                durations_in_days.append(days_diff)
                flag_for_start_date_drawdown = False
                curr_drawdown_duration = 0
                maxPnl = value

            else:
                maxPnl = value
                curr_drawdown_duration = 0

    return durations_in_days, round(max_drawdown,2), max_drawdown_duration, round(curr_drawdown,2), curr_drawdown_duration



def calculate_drawdown(pnl_cum_list):
    #print(type(pnl_cum_list),'First')
    drawdown_list = []
    maxPnl = pnl_cum_list[0]
    for value in pnl_cum_list:
        if value < maxPnl:
            drawdown = value - maxPnl 
            drawdown_list.append(round(drawdown,2))
        elif value >= maxPnl:
            maxPnl = value
            drawdown_list.append(0)
    #print(type(drawdown_list),'After')
    return drawdown_list


####win/losses calculation
def calculate_wins_losses(df):
    temp_wins = temp_losses = total_wins = total_losses = consecutive_wins = consecutive_losses = 0
    fee = 0
    for i in range(1, df.shape[0]):
        if df['PNL'][i] > 0:
            total_wins += 1
            temp_wins += 1
            if temp_losses > consecutive_losses:
                consecutive_losses = temp_losses
            temp_losses = 0
        elif df['PNL'][i] < 0:
            total_losses += 1
            temp_losses +=1

            if temp_wins > consecutive_wins:
                consecutive_wins = temp_wins
            temp_wins = 0
    win_percentage = round(total_wins / (total_wins + total_losses) * 100, 2)
    loss_percentage = round(total_losses / (total_wins + total_losses) * 100, 2)
    return total_wins,total_losses,consecutive_wins,consecutive_losses,win_percentage,loss_percentage


#### r2 score calculation
def calculate_r2_score(ledger):
    res = stats.linregress(range(len(ledger.pnl_sum)), ledger.pnl_sum.to_numpy())
    r2 = res.rvalue**2
    return round(r2,2)

### positive negative pnl calculation
def pos_neg_pnl_percent(pnl_percent):
    total_negative_pnl_percent = np.where(pnl_percent < 0, pnl_percent, 0).sum()
    negative_pnl_percent = np.where(pnl_percent < 0, pnl_percent, 0)

    total_positive_pnl_percent = np.where(pnl_percent > 0, pnl_percent, 0).sum()
    total_pnl_percent = total_positive_pnl_percent + total_negative_pnl_percent
    return total_pnl_percent, total_negative_pnl_percent,total_positive_pnl_percent,negative_pnl_percent

##sharp calculation
def calculate_sharp(returns):
    # list_pnls = [i for i in list_pnls if i != 0]
    # std_ = np.std(list_pnls)
    # sharpe = round(returns / std_, 2)
    # return sharpe
    returns = returns.astype(np.float64)
    risk_free_rate=0
    sharpe_ratio = qs.stats.sharpe(returns, risk_free_rate)
    # print("Sharpe Ratio:", sharpe_ratio)
    return round(sharpe_ratio,2)

#### sortino calculation
def downside_risk(returns, risk_free=0):
    adj_returns = returns - risk_free
    sqr_downside = np.square(np.clip(adj_returns, np.NINF, 0))
    return np.sqrt(np.nanmean(sqr_downside) * 252)



def sharpe_cal(returns):
    risk_free_rate=0
    sharpe_ratio = qs.stats.sharpe(returns, risk_free_rate)
    print("Sharpe Ratio:", sharpe_ratio)
    return round(sharpe_ratio,2)

def calculate_sortino(returns,negative_pnl_list):
    # negative_pnl_list = [i for i in negative_pnl_list if i != 0]
    # std_ = np.std(negative_pnl_list)
    # sortino = returns/std_
    # return sortino
    sortino=qs.stats.sortino(returns)
    return sortino


def rename_name(row):
    return row.replace('_','-')


def average_daily_pnl(pnl_sum,date_started):
    #date_started = date_started.split(' ')[0]
    
    # Convert date_started to string if it is a Timestamp object
    if isinstance(date_started, pd.Timestamp):
        date_started = date_started.strftime('%Y-%m-%d %H:%M:%S')
    
    date_started = datetime.strptime(date_started,'%Y-%m-%d %H:%M:%S').date()
    pnl_sum = int(pnl_sum)
    now_date = datetime.now()
    delta = now_date.date() - date_started
    total_days = int(delta.days)
    daily_pnl = pnl_sum/total_days
    return daily_pnl


def calculate_win_loss_ratio(win_percentage,loss_percentage):
    try:
        win_loss_ratio = win_percentage/loss_percentage
    except:
        win_loss_ratio = win_percentage
    return win_loss_ratio


def calculate_alpha_beta(df2):
    df = df2.copy()
    df['close price'] = df['close price'].astype(float)
    df['entry price'] = df['entry price'].astype(float)
    df['PNL'] = df['PNL'].astype(float)


    df['btc_return'] = (df['close price'] / df['entry price'] - 1) * 100
    #def calculate_alpha_beta(self, df):
    df['PNL'] = df['PNL'].astype(float)
    df.sort_values(by='PNL', ignore_index=True, inplace=True)
    x = df['PNL'].values
    y = df['btc_return'].values
    slope, intercept, _, _, _ = linregress(x, y)
    alpha = slope
    beta = intercept
    return alpha, beta


## stats into database
def calculate_all_statistics(strat_ledger):
#     strat_ledger = get_ledger(stratgey_name)
    
    # Adding the pnl_sum column to the dataframe
    strat_ledger['pnl_sum'] = strat_ledger['PNL'].cumsum()
    date_column = strat_ledger.columns[0]
    
    drawdown_list = calculate_drawdown(strat_ledger['pnl_sum'])

    current_pnl_sum = strat_ledger['pnl_sum'].iloc[-1]
    date_started = strat_ledger[date_column].iloc[0]
    avg_daily_pnl = average_daily_pnl(current_pnl_sum,date_started)
    #print(avg_daily_pnl)

    pnl_sum_scores = get_last_pnl_scores(strat_ledger)
    total_pnl_percent, total_negative_pnl_percent,total_positive_pnl_percent, negative_pnl_percent = pos_neg_pnl_percent(strat_ledger['PNL'])
    total_pnl_percent =  strat_ledger['PNL'].cumsum()#strat_ledger.iloc[-1]['pnl_sum']
    total_pnl_percent = total_pnl_percent.iloc[-1]

    r2_score = calculate_r2_score(strat_ledger)

    temp_df = strat_ledger[strat_ledger['close price'] != 0]
    alpha, beta = calculate_alpha_beta(temp_df)
    sharpe = calculate_sharp(temp_df['PNL'])
    sortino = calculate_sortino(temp_df['PNL'], negative_pnl_percent)

    print('done')
#     sharpe = calculate_sharp(total_pnl_percent, strat_ledger["PNL"])
#     print(sharpe,sharpe_test,'Shapreeee')
#     sortino = calculate_sortino(total_pnl_percent,negative_pnl_percent)
    total_wins,total_losses,consecutive_wins,consecutive_losses,win_percentage,loss_percentage = calculate_wins_losses(strat_ledger)
    win_loss_ratio = calculate_win_loss_ratio(win_percentage,loss_percentage)
 
    drawdown_durations,max_drawdown, max_drawdown_duration, curr_drawdown, curr_drawdown_duration = longest_drawdown(strat_ledger['pnl_sum'], strat_ledger[date_column])
    try:
        average_drawdown = round(sum(drawdown_list)/len(drawdown_list),2)
        average_drawdown_duration = round(sum(drawdown_durations)/len(drawdown_durations),2)
    except ZeroDivisionError:
        average_drawdown_duration = 0
        average_drawdown = 0
#     try:
#         #columnValues = (abs(round(float(curr_drawdown),2)),round(float(curr_drawdown_duration),2),abs(round(float(average_drawdown),2)),round(float(average_drawdown_duration),2),abs(round(float(max_drawdown),2)),round(float(max_drawdown_duration),2),round(float(r2_score),2),round(float(sharpe),2),abs(round(float(sortino),2)),round(float(total_pnl_percent),2),round(float(total_positive_pnl_percent),2),round(float(total_negative_pnl_percent),2),round(float(total_wins),2),round(float(total_losses),2),round(float(consecutive_wins),2),round(float(consecutive_losses),2),round(float(win_percentage),2),round(float(loss_percentage),2),round(float(pnl_sum_scores[0]),2),round(float(pnl_sum_scores[1]),2),round(float(pnl_sum_scores[2]),2),round(float(pnl_sum_scores[3]),2),round(float(pnl_sum_scores[4]),2),round(float(pnl_sum_scores[5]),2),round(float(avg_daily_pnl),2),round(float(win_loss_ratio),2),stratgey_name)
#         curr_drawdown = abs(curr_drawdown)
#         average_drawdown = abs(average_drawdown)
#         max_drawdown = abs(max_drawdown)
#         curr_drawdown = curr_drawdown*(-1)
#         average_drawdown = average_drawdown*(-1)
#         max_drawdown = max_drawdown*(-1)

#         columnValues = (round(float(curr_drawdown),2),round(float(curr_drawdown_duration),2),round(float(average_drawdown),2),round(float(average_drawdown_duration),2),round(float(max_drawdown),2),round(float(max_drawdown_duration),2),round(float(r2_score),2),round(float(sharpe),2),abs(round(float(sortino),2)),round(float(total_pnl_percent),2),round(float(total_positive_pnl_percent),2),round(float(total_negative_pnl_percent),2),round(float(total_wins),2),round(float(total_losses),2),round(float(consecutive_wins),2),round(float(consecutive_losses),2),round(float(win_percentage),2),round(float(loss_percentage),2),round(float(pnl_sum_scores[0]),2),round(float(pnl_sum_scores[1]),2),round(float(pnl_sum_scores[2]),2),round(float(pnl_sum_scores[3]),2),round(float(pnl_sum_scores[4]),2),round(float(pnl_sum_scores[5]),2),round(float(avg_daily_pnl),2),round(float(win_loss_ratio),2),round(float(alpha),2),round(float(beta),2))
        
    # Create a dictionary with descriptive keys
    stats_dict = {
        'Current Drawdown': round(float(abs(curr_drawdown)), 2) * -1,
        'Current Drawdown Duration (days)': round(float(curr_drawdown_duration), 2),
        'Average Drawdown': round(float(abs(average_drawdown)), 2) * -1,
        'Average Drawdown Duration (days)': round(float(average_drawdown_duration), 2),
        'Maximum Drawdown': round(float(abs(max_drawdown)), 2) * -1,
        'Maximum Drawdown Duration (days)': round(float(max_drawdown_duration), 2),
        'R-squared Score': round(float(r2_score), 2),
        'Sharpe Ratio': round(float(sharpe), 2),
        'Sortino Ratio': round(float(sortino), 2),
        'Total PnL (%)': round(float(total_pnl_percent), 2),
        'Total Positive PnL (%)': round(float(total_positive_pnl_percent), 2),
        'Total Negative PnL (%)': round(float(total_negative_pnl_percent), 2),
        'Total Wins': round(float(total_wins), 2),
        'Total Losses': round(float(total_losses), 2),
        'Consecutive Wins': round(float(consecutive_wins), 2),
        'Consecutive Losses': round(float(consecutive_losses), 2),
        'Win Percentage (%)': round(float(win_percentage), 2),
        'Loss Percentage (%)': round(float(loss_percentage), 2),
        'PnL Sum 1': round(float(pnl_sum_scores[0]), 2),
        'PnL Sum 7': round(float(pnl_sum_scores[1]), 2),
        'PnL Sum 15': round(float(pnl_sum_scores[2]), 2),
        'PnL Sum 30': round(float(pnl_sum_scores[3]), 2),
        'PnL Sum 45': round(float(pnl_sum_scores[4]), 2),
        'PnL Sum 60': round(float(pnl_sum_scores[5]), 2),
        'Average Daily PnL': round(float(avg_daily_pnl), 2),
        'Win/Loss Ratio': round(float(win_loss_ratio), 2),
        'Alpha': round(float(alpha), 2),
        'Beta': round(float(beta), 2),
    }
    
    # Convert dictionary to DataFrame for better visualization (optional)
    stats_df = pd.DataFrame([stats_dict])
    
    return stats_dict, stats_df

In [28]:
backtest_df_obv_test_with_pnl_sums = calculate_pnl_sum_all(copy.deepcopy(backtest_df_obv_test))
backtest_df_obv_test_with_pnl_sums

Unnamed: 0,Open time (4h),direction,entry price,close price,action,PNL,Balance,pnl_sum,pnl_sum_1,pnl_sum_7,pnl_sum_30,pnl_sum_15,pnl_sum_45,pnl_sum_60
0,2023-07-18 04:00:00,long,29944.1,0.0,,0.000000,1.017663e+03,0.000000,0.00,0.00,0.00,0.00,0.00,0.00
1,2023-07-18 12:00:00,short,29944.1,30476.0,direction,1.776310,1.017663e+03,1.776310,1.78,1.78,1.78,1.78,1.78,1.78
2,2023-07-18 12:00:00,short,30476.0,0.0,,0.000000,1.023539e+03,1.776310,1.78,1.78,1.78,1.78,1.78,1.78
3,2023-07-19 00:00:00,long,30476.0,30297.0,direction,0.587347,1.023539e+03,2.363657,2.36,2.36,2.36,2.36,2.36,2.36
4,2023-07-19 00:00:00,long,30297.0,0.0,,0.000000,1.008666e+03,2.363657,2.36,2.36,2.36,2.36,2.36,2.36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2591,2024-07-17 12:00:00,long,66196.5,65000.1,direction,1.807346,6.950016e+08,1388.735220,12.53,40.14,201.64,108.58,240.86,307.71
2592,2024-07-17 12:00:00,long,65000.1,0.0,,0.000000,6.997031e+08,1388.735220,12.53,40.14,201.64,108.58,240.86,307.71
2593,2024-07-17 16:00:00,short,65000.1,65446.3,direction,0.686460,6.997031e+08,1389.421681,12.82,40.82,201.43,109.27,241.55,308.39
2594,2024-07-17 16:00:00,short,65446.3,0.0,,0.000000,7.077659e+08,1389.421681,12.82,40.82,201.43,109.27,241.55,308.39


In [32]:
all_stats_dict, all_stats_df = calculate_all_statistics(copy.deepcopy(backtest_df_obv_test_with_pnl_sums))
all_stats_df

done


Unnamed: 0,Current Drawdown,Current Drawdown Duration (days),Average Drawdown,Average Drawdown Duration (days),Maximum Drawdown,Maximum Drawdown Duration (days),R-squared Score,Sharpe Ratio,Sortino Ratio,Total PnL (%),...,PnL Sum 1,PnL Sum 7,PnL Sum 15,PnL Sum 30,PnL Sum 45,PnL Sum 60,Average Daily PnL,Win/Loss Ratio,Alpha,Beta
0,-1.76,0.0,-0.41,0.26,-15.54,4.0,0.98,8.8,23.84,1390.58,...,6.02,39.67,106.76,201.5,240.72,306.87,3.55,5.5,0.12,-0.04


In [33]:
all_stats_dict

{'Current Drawdown': -1.76,
 'Current Drawdown Duration (days)': 0.0,
 'Average Drawdown': -0.41,
 'Average Drawdown Duration (days)': 0.26,
 'Maximum Drawdown': -15.54,
 'Maximum Drawdown Duration (days)': 4.0,
 'R-squared Score': 0.98,
 'Sharpe Ratio': 8.8,
 'Sortino Ratio': 23.84,
 'Total PnL (%)': 1390.58,
 'Total Positive PnL (%)': 1581.99,
 'Total Negative PnL (%)': -191.4,
 'Total Wins': 1095.0,
 'Total Losses': 199.0,
 'Consecutive Wins': 64.0,
 'Consecutive Losses': 6.0,
 'Win Percentage (%)': 84.62,
 'Loss Percentage (%)': 15.38,
 'PnL Sum 1': 6.02,
 'PnL Sum 7': 39.67,
 'PnL Sum 15': 106.76,
 'PnL Sum 30': 201.5,
 'PnL Sum 45': 240.72,
 'PnL Sum 60': 306.87,
 'Average Daily PnL': 3.55,
 'Win/Loss Ratio': 5.5,
 'Alpha': 0.12,
 'Beta': -0.04}

# ***More Later!***