# Neurog Internship: (Task 13)

# All Libraries

In [9]:
# For Vizualization & Plotting
import matplotlib.pyplot as plt

# For Datetime Handling
from datetime import timedelta
import datetime

# Technical Analysis Library Modules
from ta.momentum import AwesomeOscillatorIndicator
from ta.volume import OnBalanceVolumeIndicator
from ta.momentum import StochRSIIndicator
from ta.momentum import RSIIndicator
from ta.trend import PSARIndicator
from ta.trend import ADXIndicator
from ta.trend import SMAIndicator
from ta.trend import EMAIndicator

# Other Necessary Libraries
import quantstats as qs
import pandas as pd
import numpy as np
import tailer
import copy
import os
import io

# All Declarations

In [10]:
# Define the relative path to the CSV file
csv_filename_1 = "BTCUSDT_1_Minute_From_2020_Till_Now.csv"
csv_filename_2 = "all_models_info.csv"
# csv_filename_3 = "

# Settig Up The Directory From The Directory Hierarchy
# Get the notebook's current directory
notebook_dir = os.getcwd()

# Go up one level to the parent directory
parent_dir = os.path.abspath(os.path.join(notebook_dir, os.pardir))

# Go to the main data directory
data_dir = os.path.join(parent_dir, "data")

# Create the full path to the output CSV file 1 in the 'ohlc' directory within 'data'
ohlc_dir = os.path.join(data_dir, "ohlc")
full_csv_path_1 = os.path.join(ohlc_dir, csv_filename_1)

# Create the full path to the output CSV file 2 in the 'info' directory within 'models' directory within 'data'
models_dir = os.path.join(data_dir, "models")
models_info_dir = os.path.join(models_dir, "info")
full_csv_path_2 = os.path.join(models_info_dir, csv_filename_2)

# # Create the full path to the output CSV file 3 in the 'ledger' directory within 'models' directory within 'data'
# models_ledger_dir = os.path.join(models_dir, "ledger")

# All Functions

## Utility Functions

In [3]:
# Function to return the timeframe of the dataframe's index
def get_timeframe(df):
    """
    Returns the inferred timeframe (frequency) of the datetime index of a DataFrame.
    
    Parameters:
        df (pd.DataFrame): DataFrame with datetime index.
    
    Returns:
        str: Timeframe (frequency) of the datetime index, e.g., '4H', '1D'.
    """
    # Check if the index is datetime
    if not pd.api.types.is_datetime64_any_dtype(df.index):
        raise ValueError("DataFrame index must be datetime.")
    
    # Infer the frequency of the datetime index
    frequency = pd.infer_freq(df.index)

    if len(frequency) == 1:
        frequency = '1' + frequency
    
    if frequency is None:
        raise ValueError("Unable to infer the frequency of the datetime index.")
    
    return frequency

# Function that extracts previous 'N' times data from the latest data
def get_past_data(df, period, timeframe):
    """
    Returns data from the past specified period and aligns the start and end dates with the given timeframe.
    
    Parameters:
        df (pd.DataFrame): DataFrame with datetime index.
        period (str): Period string, e.g., '1Y' for one year, '1M' for one month.
        timeframe (str): Timeframe string, e.g., '1M' for one month, '4H' for four hours, '15T' for fifteen minutes.
    
    Returns:
        pd.DataFrame: DataFrame filtered by the specified period and aligned with the timeframe.
    """
    # Check if the index is datetime
    if not pd.api.types.is_datetime64_any_dtype(df.index):
        raise ValueError("DataFrame index must be datetime.")
    
    # Mapping period strings to DateOffset arguments
    period_mapping = {
        'Y': 'years',
        'M': 'months',
        'W': 'weeks',
        'D': 'days',
        'H': 'hours',
        'T': 'minutes'
    }
    
    # Extract the time unit and quantity for the period
    period_unit = period[-1]
    period_quantity = int(period[:-1])
    
    # Get the corresponding DateOffset argument for the period
    if period_unit not in period_mapping:
        raise ValueError("Invalid period format. Use formats like '1Y', '1M', '1W', '1D', etc.")
    
    period_offset_arg = period_mapping[period_unit]
    period_offset = pd.DateOffset(**{period_offset_arg: period_quantity})
    
    # Get the last date in the DataFrame
    last_date = df.index[-1]
    
    # Calculate the start date
    start_date = last_date - period_offset
    
    # Align start_date to the nearest preceding datetime divisible by the timeframe
    freq = pd.tseries.frequencies.to_offset(timeframe)
    aligned_start_date = start_date.floor(freq)
    
    # Align end_date to the nearest preceding datetime divisible by the timeframe
    aligned_end_date = last_date.floor(freq)
    
    # Filter the DataFrame for the desired date range
    filtered_df_last = df[(df.index >= aligned_start_date) & (df.index <= aligned_end_date)]
    filtered_df_first = df[(df.index < aligned_start_date)]
    
    return filtered_df_first, filtered_df_last

# Function that extracts the last n/2 rows of the csv
def get_csv_tail(filepath, max_rows=1):
    with open(filepath) as file:
        # Read the header
        header = file.readline().strip()
        
        # Read the last lines of the file
        last_lines = tailer.tail(file, max_rows)
        last_lines = last_lines[1:]
        
    # Combine the header with the last lines
    combined_lines = '\n'.join([header] + last_lines)

    return io.StringIO(combined_lines)

## (Function To Convert DF To Any Timeframe) - Taken From The Previous Day

In [4]:
# Functions converts the dataframe into any given time frame.
def convert_1m_to_any_timeframe(df: pd.DataFrame, timeframe: str) -> pd.DataFrame:
    """
    Convert a DataFrame of 1-minute OHLC data to any given timeframe.

    Args:
    - df (pd.DataFrame): DataFrame containing 1-minute OHLC data. 
      The DataFrame should have a datetime index & columns ['Open', 'High', 'Low', 'Close', 'Volume'].
    - timeframe (str): The desired timeframe to resample the data to (e.g., '1H' for 1 hour, '1D' for 1 day).

    Returns:
    - pd.DataFrame: Resampled DataFrame with OHLC data in the specified timeframe. The index will be renamed to
      reflect the new timeframe.

    Example:
    ```
    resampled_df = convert_1m_to_any_timeframe(ohlc_df, '1H')
    ```
    """
    # Ensure the DataFrame index is of datetime type
    df.index = pd.to_datetime(df.index)
    
    # Try resampling the data to the desired timeframe
    try:
        df_resampled = df.resample(timeframe).agg({
            'Open': 'first',  # Take the first 'Open' value in the timeframe
            'High': 'max',    # Take the maximum 'High' value in the timeframe
            'Low': 'min',     # Take the minimum 'Low' value in the timeframe
            'Close': 'last',  # Take the last 'Close' value in the timeframe
            'Volume': 'mean'  # Take the mean 'Volume' value in the timeframe
        })
    except Exception as e:
        print(f"An error occurred while resampling! Error message: {e}")
        return pd.DataFrame()  # Return an empty DataFrame in case of error

    # Rename the index to reflect the new timeframe
    df_resampled.index.rename(f'Open time ({timeframe})', inplace = True)
    
    return df_resampled

## MACD Indicator Function

In [5]:
def calculate_macd(df: pd.DataFrame, short_window: int = 12, long_window: int = 26, signal_window: int = 9) -> pd.DataFrame:
    """
    Calculate the MACD (Moving Average Convergence Divergence) for a given DataFrame.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' price data.
    - short_window (int): The window size for the short-term EMA, default is 12.
    - long_window (int): The window size for the long-term EMA, default is 26.
    - signal_window (int): The window size for the Signal line, default is 9.

    Returns:
    - pd.DataFrame: DataFrame with the MACD line and Signal line.
    """
    # Calculate the short-term and long-term EMAs
    df['EMA_12'] = df['Close'].ewm(span=short_window, adjust=False).mean()
    df['EMA_26'] = df['Close'].ewm(span=long_window, adjust=False).mean()

    # Calculate the MACD line
    df['MACD'] = df['EMA_12'] - df['EMA_26']

    # Calculate the Signal line
    df['Signal_Line'] = df['MACD'].ewm(span=signal_window, adjust=False).mean()

    return df

def generate_macd_signals(df: pd.DataFrame) -> pd.DataFrame:
    """
    Generate trading signals based on MACD values.

    Args:
    - df (pd.DataFrame): DataFrame containing 'MACD' and 'Signal_Line' values.

    Returns:
    - pd.DataFrame: DataFrame with trading signals.
    """
    df['Signal'] = 0  # Default no signal
    df.loc[df['MACD'] > df['Signal_Line'], 'Signal'] = 1  # Buy signal
    df.loc[df['MACD'] < df['Signal_Line'], 'Signal'] = -1  # Sell signal

    return df

## ADX In Combination With Parabolic SAR Indicator Function

In [6]:
def calculate_adx(df: pd.DataFrame, window: int = 14) -> pd.DataFrame:
    """
    Calculate the Average Directional Index (ADX) and add it to the DataFrame.
    
    Args:
    - df (pd.DataFrame): DataFrame containing 'High', 'Low', and 'Close' columns.
    - window (int): Window size for the ADX calculation (default is 14).
    
    Returns:
    - df (pd.DataFrame): DataFrame with added 'ADX' column.
    """
    adx = ADXIndicator(df['High'], df['Low'], df['Close'], window=window)
    df['ADX'] = adx.adx()
    df['DI+'] = adx.adx_pos()
    df['DI-'] = adx.adx_neg()
    return df

def calculate_parabolic_sar(df: pd.DataFrame) -> pd.DataFrame:
    """
    Calculate the Parabolic SAR and add it to the DataFrame.
    
    Args:
    - df (pd.DataFrame): DataFrame containing 'High', 'Low', and 'Close' columns.
    
    Returns:
    - df (pd.DataFrame): DataFrame with added 'Parabolic_SAR' column.
    """
    psar = PSARIndicator(df['High'], df['Low'], df['Close'])
    df['Parabolic_SAR'] = psar.psar()
    return df

def generate_adx_parabolic_sar_signals(df: pd.DataFrame, adx_threshold: float = 25) -> pd.DataFrame:
    """
    Generate trading signals based on ADX and Parabolic SAR.

    Args:
    - df (pd.DataFrame): DataFrame containing 'ADX' and 'Parabolic_SAR' columns.
    - adx_threshold (float): Threshold for ADX to consider a strong trend (default is 25).

    Returns:
    - df (pd.DataFrame): DataFrame with added 'Signal' column (1 for buy, -1 for sell, 0 for hold).
    """
    df['Signal'] = 0  # Default to hold
    df.loc[(df['ADX'] > adx_threshold) & (df['Close'] > df['Parabolic_SAR']), 'Signal'] = 1  # Buy signal
    df.loc[(df['ADX'] > adx_threshold) & (df['Close'] < df['Parabolic_SAR']), 'Signal'] = -1  # Sell signal
    return df

## RSI Indicator Function

In [7]:
def calculate_rsi(df: pd.DataFrame, window: int = 14) -> pd.DataFrame:
    """
    Calculate the Relative Strength Index (RSI) using the ta library.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' price data.
    - window (int): The window size for calculating RSI, default is 14.

    Returns:
    - pd.DataFrame: DataFrame with the RSI values.
    """
    # Calculate RSI using ta library
    rsi_indicator = RSIIndicator(close=df['Close'], window=window, fillna=True)
    df['RSI'] = rsi_indicator.rsi()

    return df

def generate_rsi_signals(df: pd.DataFrame, rsi_lower: int = 30, rsi_upper: int = 70) -> pd.DataFrame:
    """
    Generate trading signals based on RSI values.

    Args:
    - df (pd.DataFrame): DataFrame containing 'RSI' values.
    - rsi_lower (int): RSI lower threshold for buy signals, default is 30.
    - rsi_upper (int): RSI upper threshold for sell signals, default is 70.

    Returns:
    - pd.DataFrame: DataFrame with trading signals.
    """
    df['Signal'] = 0  # Default no signal
    df.loc[df['RSI'] > rsi_upper, 'Signal'] = -1  # Sell signal
    df.loc[df['RSI'] < rsi_lower, 'Signal'] = 1   # Buy signal

    return df

## Stochastic RSI Indicator Function

In [8]:
def calculate_stochrsi(
    df: pd.DataFrame,
    window: int = 14,
    smooth1: int = 3,
    smooth2: int = 3,
    fillna: bool = False,
) -> pd.DataFrame:
    """
    Calculate the Stochastic RSI (STOCHRSI) for a given DataFrame using the ta library.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' price data.
    - window (int): The window size for calculating RSI, default is 14.
    - smooth1 (int): The window size for the first smoothing, default is 3.
    - smooth2 (int): The window size for the second smoothing, default is 3.
    - fillna (bool): The paramter to specify whether to fill NaN values or not.

    Returns:
    - df (pd.DataFrame): DataFrame with the Stochastic RSI values.
    """

    # Calculate Stochastic RSI
    stoch_rsi = StochRSIIndicator(
        close=df['Close'],
        window = window,
        smooth1 = smooth1,
        smooth2 = smooth2,
        fillna = fillna
    )

    # Add Stochastic RSI values to the DataFrame
    df['StochRSI'] = stoch_rsi.stochrsi()
    
    df['StochRSI_K'] = stoch_rsi.stochrsi_k() * 100
    df['StochRSI_D'] = stoch_rsi.stochrsi_d() * 100

    return df

def generate_stochrsi_signals(
    df: pd.DataFrame,
    stochrsi_upper: int = 0.7,
    stochrsi_lower: int = 0.3,
) -> pd.DataFrame:
    """
    Calculate the Stochastic RSI (STOCHRSI) and generate buy/sell signals for a given DataFrame.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' price data.
    - window (int): The window size for calculating RSI, default is 14.
    - smooth1 (int): The window size for the first smoothing, default is 3.
    - smooth2 (int): The window size for the second smoothing, default is 3.
    - fillna (bool): The parameter to specify whether to fill NaN values or not.

    Returns:
    - df (pd.DataFrame): DataFrame with the Stochastic RSI values and signals.
    """
    # Generate signals
    df['Signal'] = 0
    df['Signal'] = np.where((df['StochRSI'].shift(1) < 0.2) & (df['StochRSI'] >= 0.2), 1, df['Signal'])  # Buy signal
    df['Signal'] = np.where((df['StochRSI'].shift(1) > 0.8) & (df['StochRSI'] <= 0.8), -1, df['Signal'])  # Sell signal

    return df

## OBV Indicator Function

In [9]:
def calculate_obv(df: pd.DataFrame, fillna: bool = False) -> pd.DataFrame:
    """
    Generate On-Balance Volume (OBV) Values.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' and 'Volume' columns.
    - fillna (bool): Parameter that specifies whether or not to fill NaN values.

    Returns:
    - df (pd.DataFrame): DataFrame with added 'OBV' column.
    """
    obv = OnBalanceVolumeIndicator(
        close = df['Close'],
        volume = df['Volume'],
        fillna = True
    )
    
    df['OBV'] = obv.on_balance_volume()

    return df


def generate_obv_signals(df: pd.DataFrame) -> pd.DataFrame:
    """
    Generate trading signals based on On-Balance Volume (OBV) indicator.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' and 'Volume' columns.
    - fillna (bool): Parameter that specifies whether or not to fill NaN values.

    Returns:
    - df (pd.DataFrame): DataFrame with added 'OBV' and 'Signal' columns.
    """
    df['Signal'] = 0  # Initialize Signal column
    
    # Generate signals based on OBV
    df.loc[df['OBV'] > df['OBV'].shift(1), 'Signal'] = 1  # Buy signal
    df.loc[df['OBV'] < df['OBV'].shift(1), 'Signal'] = -1  # Sell signal

    return df

## Simple Moving Average (SMA) Indicator Function

In [10]:
def calculate_sma(
    df: pd.DataFrame, 
    window: int, 
    source: str = 'Close', 
    offset: int = 0,
    fillna: bool = False
) -> pd.DataFrame:
    """
    Calculate the Simple Moving Average (SMA) for a given DataFrame with additional parameters.

    Args:
    - df (pd.DataFrame): DataFrame containing price data.
    - window (int): The window size for calculating SMA.
    - source (str): The column name on which to calculate the SMA, default is 'Close'.
    - offset (int): The number of periods to offset the SMA, default is 0.

    Returns:
    - df (pd.DataFrame): DataFrame with the SMA values.
    """
    sma_indicator = SMAIndicator(close = df[source], window = window, fillna = fillna)
    df[f'SMA_{window}'] = sma_indicator.sma_indicator()

    if offset != 0:
        df[f'SMA_{window}'] = df[f'SMA_{window}'].shift(offset)
    
    return df

def generate_sma_signals(
    df: pd.DataFrame, 
    short_window: int = 50, 
    long_window: int = 200
) -> pd.DataFrame:
    """
    Generate trading signals based on Simple Moving Average (SMA) crossover strategy.

    Args:
    - df (pd.DataFrame): DataFrame containing price data with SMA values.
    - short_window (int): The window size for the short-term SMA, default is 50.
    - long_window (int): The window size for the long-term SMA, default is 200.

    Returns:
    - pd.DataFrame: DataFrame with trading signals.
    """
    # Calculate short-term and long-term SMAs
    df = calculate_sma(df, window=short_window, source='Close')
    df = calculate_sma(df, window=long_window, source='Close')
    
    # Generate signals: 
    # Buy when the short-term SMA crosses above the long-term SMA
    # Sell when the short-term SMA crosses below the long-term SMA
    df['Signal'] = 0  # Default no signal
    df.loc[df[f'SMA_{short_window}'] > df[f'SMA_{long_window}'], 'Signal'] = 1   # Buy signal
    df.loc[df[f'SMA_{short_window}'] < df[f'SMA_{long_window}'], 'Signal'] = -1  # Sell signal

    return df

## Exponential Moving Average (EMA) Indicator Function

In [11]:
def calculate_ema(
    df: pd.DataFrame, 
    window: int, 
    source: str = 'Close', 
    offset: int = 0, 
    fillna: bool = False,
    smoothing_line: str = 'ema', 
    smoothing_length: int = None
) -> pd.DataFrame:
    """
    Calculate the Exponential Moving Average (EMA) for a given DataFrame with additional parameters.

    Args:
    - df (pd.DataFrame): DataFrame containing price data.
    - window (int): The window size for calculating EMA.
    - source (str): The column name on which to calculate the EMA, default is 'Close'.
    - offset (int): The number of periods to offset the EMA, default is 0.
    - smoothing_line (str): The type of smoothing line, default is 'ema' (only EMA supported in this function).
    - smoothing_length (int): The window size for additional smoothing, not used in this function. Will implement later
    - fillna (bool): The parameter to specifiy if NaN values are to be filled or not

    Returns:
    - df (pd.DataFrame): DataFrame with the EMA values.
    """
    if smoothing_line != 'ema':
        raise ValueError("Only 'ema' smoothing is supported in this function.")
    
    ema_indicator = EMAIndicator(close = df[source], window = window, fillna = fillna)
    df[f'EMA_{window}'] = ema_indicator.ema_indicator()

    if offset != 0:
        df[f'EMA_{window}'] = df[f'EMA_{window}'].shift(offset)
    
    return df

def generate_triple_ema_signals(
    df: pd.DataFrame, 
    short_window: int = 5, 
    medium_window: int = 21, 
    long_window: int = 50,
    source: str = 'Close'
) -> pd.DataFrame:
    """
    Generate trading signals based on the Triple EMA Crossover strategy.

    Args:
    - df (pd.DataFrame): DataFrame containing price data.
    - short_window (int): The window size for the short-term EMA.
    - medium_window (int): The window size for the medium-term EMA.
    - long_window (int): The window size for the long-term EMA.
    - source (str): The column name on which to calculate the EMAs, default is 'Close'.

    Returns:
    - pd.DataFrame: DataFrame with trading signals.
    """
    # Calculate short-term, medium-term, and long-term EMAs
    df = calculate_ema(df, short_window, source)
    df = calculate_ema(df, medium_window, source)
    df = calculate_ema(df, long_window, source)

    # Initialize signal column
    df['Signal'] = 0

    # Buy signal: Short EMA crosses above both Medium and Long EMAs
    df.loc[(df[f'EMA_{short_window}'] > df[f'EMA_{medium_window}']) & (df[f'EMA_{short_window}'] > df[f'EMA_{long_window}']), 'Signal'] = 1

    # Sell signal: Short EMA crosses below both Medium and Long EMAs
    df.loc[(df[f'EMA_{short_window}'] < df[f'EMA_{medium_window}']) & (df[f'EMA_{short_window}'] < df[f'EMA_{long_window}']), 'Signal'] = -1

    return df

## Smoothing Moving Average (SMMA) Indicator Function

In [12]:
def calculate_smma(
    df: pd.DataFrame, 
    window: int, 
    source: str = 'Close', 
    offset: int = 0
) -> pd.DataFrame:
    """
    Calculate the Smoothed Moving Average (SMMA) for a given DataFrame with additional parameters.

    Args:
    - df (pd.DataFrame): DataFrame containing price data.
    - window (int): The window size for calculating SMMA.
    - source (str): The column name on which to calculate the SMMA, default is 'Close'.
    - offset (int): The number of periods to offset the SMMA, default is 0.

    Returns:
    - df (pd.DataFrame): DataFrame with the SMMA values.
    """
    # Initialize SMMA column
    df['SMMA'] = 0.0

    # Calculate initial SMMA values (SMA for the first window periods)
    df['SUM1'] = df[source].rolling(window=window, min_periods=1).sum()
    df.loc[df.index[window - 1], 'SMMA'] = df['SUM1'].iloc[window - 1] / window

    # Calculate subsequent SMMA values using the iterative formula
    for i in range(window, len(df)):
        prev_smma = df.loc[df.index[i - 1], 'SMMA']
        current_price = df.loc[df.index[i], source]
        df.loc[df.index[i], 'SMMA'] = (prev_smma * (window - 1) + current_price) / window

    # Drop intermediate columns if not needed
    df.drop(['SUM1'], axis=1, inplace=True)

    return df

def generate_smma_signals(
    df: pd.DataFrame, 
    window: int = 14, 
    source: str = 'Close'
) -> pd.DataFrame:
    """
    Generate trading signals based on the Smoothed Moving Average (SMMA) crossover strategy.

    Args:
    - df (pd.DataFrame): DataFrame containing price data.
    - window (int): The window size for calculating SMMA.
    - source (str): The column name on which to calculate the SMMA, default is 'Close'.

    Returns:
    - pd.DataFrame: DataFrame with trading signals.
    """
    # Calculate SMMA
    df = calculate_smma(df, window, source)

    # Initialize signal column
    df['Signal'] = 0

    # Buy signal: When price crosses above SMMA
    df.loc[df[source] > df['SMMA'], 'Signal'] = 1

    # Sell signal: When price crosses below SMMA
    df.loc[df[source] < df['SMMA'], 'Signal'] = -1

    return df

## Volume Weighted Moving Average (VWMA) Indicator Function

In [13]:
def calculate_vwma(df: pd.DataFrame, window: int = 14) -> pd.DataFrame:
    """
    Calculate the Volume Weighted Moving Average (VWMA) for a given DataFrame.

    Args:
    - df (pd.DataFrame): DataFrame containing price data and volume.
    - window (int): The window size for calculating VWMA.

    Returns:
    - pd.DataFrame: DataFrame with the VWMA values.
    """
    df['PriceVolume'] = df['Close'] * df['Volume']
    df['CumulativePriceVolume'] = df['PriceVolume'].rolling(window=window, min_periods=1).sum()
    df['CumulativeVolume'] = df['Volume'].rolling(window=window, min_periods=1).sum()
    df[f'VWMA_{window}'] = df['CumulativePriceVolume'] / df['CumulativeVolume']

    # Drop intermediate columns if not needed
    df.drop(['PriceVolume', 'CumulativePriceVolume', 'CumulativeVolume'], axis=1, inplace=True)
    
    return df

def generate_vwma_signals(df: pd.DataFrame, short_window: int = 10, long_window: int = 50) -> pd.DataFrame:
    """
    Generate trading signals based on VWMA crossover strategy.

    Args:
    - df (pd.DataFrame): DataFrame containing price data and volume.
    - short_window (int): Window size for short-term VWMA.
    - long_window (int): Window size for long-term VWMA.

    Returns:
    - pd.DataFrame: DataFrame with VWMA values and trading signals.
    """
    # Calculate short-term VWMA
    df = calculate_vwma(df, window=short_window)
    df.rename(columns={f'VWMA_{short_window}': 'VWMA_Short'}, inplace=True)

    # Calculate long-term VWMA
    df = calculate_vwma(df, window=long_window)
    df.rename(columns={f'VWMA_{long_window}': 'VWMA_Long'}, inplace=True)

    # Generate trading signals
    df['Signal'] = 0
    df['Signal'] = df.apply(lambda row: 1 if row['VWMA_Short'] > row['VWMA_Long'] else (-1 if row['VWMA_Short'] < row['VWMA_Long'] else 0), axis=1)

    return df

## Awesome Oscillator Indicator Function

In [14]:
def calculate_AO(df: pd.DataFrame, fillna: bool = False) -> pd.DataFrame:
    """
    Generate On-Balance Volume (OBV) Values.

    Args:
    - df (pd.DataFrame): DataFrame containing 'Close' and 'Volume' columns.
    - fillna (bool): Parameter that specifies whether or not to fill NaN values.

    Returns:
    - df (pd.DataFrame): DataFrame with added 'OBV' column.
    """
    obv = AwesomeOscillatorIndicator(
        high = df['High'],
        low = df['Low'],
        fillna = True
    )
    
    df['AO'] = obv.awesome_oscillator()

    return df


def generate_AO_signals(df: pd.DataFrame) -> pd.DataFrame:
    """
    Generate trading signals based on Awesome Oscillator (AO) values.

    Args:
    - df (pd.DataFrame): DataFrame containing 'AO' column. Should have a datetime index.

    Returns:
    - df (pd.DataFrame): DataFrame with added 'Signal' column indicating Buy (1), Sell (-1), or Hold (0) signals.
    """
    # Initialize Signal column with zeros
    df['Signal'] = 0
    
    # Generate signals based on AO
    for i in range(1, len(df)):
        if df['AO'].iloc[i] > 0 and df['AO'].iloc[i-1] <= 0:
            df.at[df.index[i], 'Signal'] = 1  # Buy signal
        elif df['AO'].iloc[i] < 0 and df['AO'].iloc[i-1] >= 0:
            df.at[df.index[i], 'Signal'] = -1  # Sell signal
    
    return df

## Backtesting Function: Previous Week's Final

In [15]:
def generate_backtest_df(data, data_1m, df_for_freq_inferring, initial_balance=1000, transaction_fee=0.01):
    """
    Generate a backtesting DataFrame based on MACD signals.

    Args:
        data (pd.DataFrame): DataFrame containing historical data with columns:
                             'Open time (4H)', 'Open', 'High', 'Low', 'Close', 'Volume', 'MACD_Signal'.
                             The DataFrame must have 'Open time (4H)' as a DateTime index.
        data_1m (pd.DataFrame): DataFrame containing 1-minute interval data with 'Open' prices.
        initial_balance (float): Initial balance for the backtest.
        transaction_fee (float): Transaction fee as a percentage of the current balance for each trade.

    Returns:
        pd.DataFrame: A DataFrame containing the backtesting results with columns:
                      'Open time (4H)', 'direction', 'entry price', 'close price', 'PNL', 'Balance'.
    """
    
    def find_first_change(signal):
        # Convert the list to a numpy array
        signal_array = np.array(signal)

        # Find indices of non-zero elements
        non_zero_indices = np.flatnonzero(signal_array != 0)

        # Find where the value changes
        changes = np.where(np.diff(signal_array[non_zero_indices]))[0] + 1
        changes = np.insert(changes, 0, 0)
        
        if len(non_zero_indices) < 1:
            return signal.index[0], signal.index[-1]

        if len(changes) == 1:
            # Get the indices of the first change
            first_change_start = non_zero_indices[changes[0]]
            first_change_end = None
        else:
            # Get the indices of the first change
            first_change_start = non_zero_indices[changes[0]]
            first_change_end = non_zero_indices[changes[0 + 1]]

        # Get the starting and ending time of the direction change
        trade_start_time = signal.index[first_change_start]
        if first_change_end != None:
            trade_end_time = signal.index[first_change_end]
        else:
            trade_end_time = None

        # Return the start and end time tuple
        return (trade_start_time, trade_end_time)
    
    # For index name
    index_name = data.index.name

    # For timeframe
    time_frame = pd.infer_freq(df_for_freq_inferring.index)

    # If timeframe is like 'H' or 'D' or 'Y' then append 1 for specificity
    if len(time_frame) == 1:
        time_frame = '1' + time_frame

    # Extract necessary columns as numpy arrays
    high_prices_1m = data_1m['High']
    low_prices_1m = data_1m['Low']
    open_prices_1m = data_1m['Open']
    open_prices = data['Open']
    signals = data['Signal']
    
    # Initialize the exit indices for tp or sl hit (takes the lowest: which happened first)
    exit_index_tp = None
    exit_index_sl = None
    
    # Initialize the backtest results array
    backtest_data = []

    # Initialize trade parameters
    tp = 0.05  # 5% take profit
    sl = 0.03  # 3% stop loss
    
    # Initialize the balance
    balance = initial_balance
    
    # Get last date
    last_date = signals.index[-1]
    
    # Initializing List To Store Directions For The Trade
    directions = []
    
    # Iterate
    while(True):
        trade_start_time, trade_end_time = find_first_change(signals)

        if trade_end_time == None:
            break
            
        direction_start = 'long' if signals[trade_start_time] == 1 else 'short'
        direction_end = 'long' if signals[trade_end_time] == 1 else 'short'
        entry_price = open_prices[trade_start_time]
        
        # Calculate take profit and stop loss prices
        if direction_start == 'long':
            tp_price = entry_price * (1 + tp)
            sl_price = entry_price * (1 - sl)
        else:
            tp_price = entry_price * (1 - tp)
            sl_price = entry_price * (1 + sl)
        
        # Find the exit point for the trade
        exit_index = None
        action = 'direction'  # Default action is direction change
        
        # getting to the closest time of that interval
        # Assuming trade_end_time is a datetime object
        # Basically doing this, so the tp and sl hit only checks and
        # compares from the (T + 1)th time till the trade end time.
        trade_start_time_matching = pd.to_datetime(trade_start_time)
        add_minute = pd.Timedelta('1m')
        trade_start_time_matching = trade_start_time_matching + add_minute
        
        if direction_start == 'long':
            tp_hit = np.where(high_prices_1m[trade_start_time_matching:trade_end_time] >= tp_price)[0]
            sl_hit = np.where(low_prices_1m[trade_start_time_matching:trade_end_time] <= sl_price)[0]
        else:
            tp_hit = np.where(low_prices_1m[trade_start_time_matching:trade_end_time] <= tp_price)[0]
            sl_hit = np.where(high_prices_1m[trade_start_time_matching:trade_end_time] >= sl_price)[0]

        if len(tp_hit) > 0:
            exit_index = tp_hit[0]
            action = 'tp'
        if len(sl_hit) > 0 and (len(tp_hit) == 0 or sl_hit[0] < tp_hit[0]):
            exit_index = sl_hit[0]
            action = 'sl'
            
        if action == 'direction':
            close_price = open_prices[trade_end_time]
        else:
            if action == 'tp':
                if direction_start == 'long':
                    trade_end_time = high_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = high_prices_1m[trade_end_time]
                else:
                    trade_end_time = low_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = low_prices_1m[trade_end_time]
            else:
                if direction_start == 'long':
                    trade_end_time = low_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = low_prices_1m[trade_end_time]
                else:
                    trade_end_time = high_prices_1m[trade_start_time_matching:trade_end_time].index[exit_index]
                    close_price = high_prices_1m[trade_end_time]
                    
            if len(directions) != 0:
                direction_end = directions[-1]
                direction_start = directions[-1]
        
        # Record the trade entry and exit
        backtest_data.append([trade_start_time, direction_start, entry_price, 0, None])
        backtest_data.append([trade_end_time, direction_end, entry_price, close_price, action])
            
        # getting to the closest time of that interval
        # Assuming trade_end_time is a datetime object
        trade_end_time = pd.to_datetime(trade_end_time)

        # Define the time format
        time_format = pd.Timedelta(time_frame)

        # Calculate the remainder when trade_end_time is divided by time_format
        remainder = trade_end_time.to_numpy().astype('datetime64[ns]').astype(np.int64) % time_format.to_numpy().astype('timedelta64[ns]').astype(np.int64)

        # If remainder is not zero, round up to the next multiple of time_format
        if remainder != 0:
            trade_end_time = trade_end_time + (time_format - pd.Timedelta(remainder, unit='ns'))
            
        # This is the condition that would end the loop (else it would run infinitely)
        if trade_end_time >= last_date:
            break
            
        signals = signals[trade_end_time:]
        
        directions.append(direction_start)
        directions.append(direction_end)

    # If no signal change detected in the entire signal array, return empty dataframe (for error handling)
    if not backtest_data:
        return pd.DataFrame()

    backtest_df = pd.DataFrame(backtest_data, columns=[index_name, 'direction', 'entry price', 'close price', 'action'])

    # Calculate PNL using vectorized operations
    entry_prices = backtest_df['entry price'][1::2].values
    close_prices = backtest_df['close price'][1::2].values
    directions = backtest_df['direction'][0:-1:2].values

    pnl = np.where(directions == 'long',
                   ((close_prices - entry_prices) / entry_prices) * 100,
                   ((entry_prices - close_prices) / entry_prices) * 100)

    # Insert the PNL values back into the DataFrame
    backtest_df.loc[1::2, 'PNL'] = pnl

    # Update balance considering PNL and transaction fees
    balances = [initial_balance]
    for pnl_value in pnl:
        transaction_cost = balances[-1] * (transaction_fee / 100)
        new_balance = balances[-1] + (np.abs(balances[-1]) * (pnl_value / 100)) - transaction_cost
        balances.append(new_balance)
    
    # Insert the balance values back into the DataFrame
    backtest_df['Balance'] = pd.Series(np.repeat(balances[1:], 2)[:len(backtest_df)])

    # Setting the date as the index of the dataframe
    backtest_df.set_index(index_name, inplace = True)
    backtest_df.index = pd.to_datetime(backtest_df.index, format='mixed')

    return backtest_df

## Function For Stats Calculation (Changed Alot)

In [16]:
# Get the last row of all pnl scores
def get_last_pnl_scores(ledger):
    # List of columns to extract
    pnl_cols = ['pnl_sum_1', 'pnl_sum_7', 'pnl_sum_15', 'pnl_sum_30', 'pnl_sum_45', 'pnl_sum_60']
    last_values = ledger[pnl_cols].iloc[-1].values
    
    return last_values.tolist()

# Calculate the 1d, 7d, 15d, 30d, 45d, 60d PNL scores
def calculate_pnl_sum_all(df):
    date_column = df.columns[0]
    
    # Ensure the date column is in datetime format
    df[date_column] = pd.to_datetime(df[date_column], errors='coerce')
    
    # Ensure the PNL column is numeric and handle any non-numeric values
    df['PNL'] = pd.to_numeric(df['PNL'], errors='coerce').fillna(0.0)
    
    # Adding the cumulative sum column to the dataframe
    df['pnl_sum'] = df['PNL'].cumsum()
    
    # Precompute the time deltas
    time_deltas = {
        'pnl_sum_1': timedelta(days=1),
        'pnl_sum_7': timedelta(days=7),
        'pnl_sum_15': timedelta(days=15),
        'pnl_sum_30': timedelta(days=30),
        'pnl_sum_45': timedelta(days=45),
        'pnl_sum_60': timedelta(days=60)
    }
    
    # Initialize columns with NaN values
    for col_name in time_deltas.keys():
        df[col_name] = np.nan
    
    # Set the date column as the index
    df.set_index(date_column, inplace=True)
    
    for col_name, delta in time_deltas.items():
        window_days = delta.days
        # Calculate the rolling sum with a time-based window
        rolling_sums = df['PNL'].rolling(window=f'{window_days}D', closed='both').sum()
        
        # Align rolling sums with the original DataFrame
        df[col_name] = rolling_sums.reindex(df.index).fillna(0.0)
    
    # Reset index to get date column back
    df.reset_index(inplace=True)
    
    # Round the results to 2 decimal places
    df = df.round({col: 2 for col in time_deltas.keys()})
    
    return df

# Calculate the difference of date
def calculate_diff_date(start, end):
    return (end - start).days

# calculate drawdown longest drawdown,current drawdown
def longest_drawdown(pnl_cum_list, date):
    max_drawdown = 0
    max_drawdown_duration = 0
    curr_drawdown = 0
    curr_drawdown_duration = 0
    drawdown_durations = []

    maxPnl = pnl_cum_list[0]
    start_date = None
    drawdown_active = False

    for counter, value in enumerate(pnl_cum_list):
        if value < maxPnl:
            drawdown = maxPnl - value

            if not drawdown_active:
                start_date = date.iloc[counter]  # Use iloc to access by position
                drawdown_active = True

            curr_drawdown = drawdown
            curr_drawdown_duration = calculate_diff_date(start_date, date.iloc[counter])  # Use iloc

            if curr_drawdown_duration > max_drawdown_duration:
                max_drawdown_duration = curr_drawdown_duration

            if drawdown > max_drawdown:
                max_drawdown = drawdown

        elif drawdown_active:
            end_date = date.iloc[counter]  # Use iloc
            drawdown_durations.append(calculate_diff_date(start_date, end_date))
            drawdown_active = False
            start_date = None  # Reset start_date after the drawdown ends
            curr_drawdown_duration = 0  # Reset current drawdown duration
            maxPnl = value

        if value > maxPnl:
            maxPnl = value

    # Ensure the current drawdown duration is updated correctly
    if drawdown_active:
        curr_drawdown_duration = calculate_diff_date(start_date, date.iloc[-1])  # Use iloc

    return drawdown_durations, round(max_drawdown, 2), max_drawdown_duration, round(curr_drawdown, 2), curr_drawdown_duration

# Calculate drawdown
def calculate_drawdown(pnl_cum_list):
    drawdown_list = []
    maxPnl = pnl_cum_list[0]

    for value in pnl_cum_list:
        maxPnl = max(maxPnl, value)
        drawdown = round(value - maxPnl, 2)
        drawdown_list.append(drawdown)

    return drawdown_list

# win/losses calculation
def calculate_wins_losses(df):
    total_wins = total_losses = consecutive_wins = consecutive_losses = 0
    temp_wins = temp_losses = 0

    for pnl in df['PNL'][1:]:
        if pnl > 0:
            total_wins += 1
            temp_wins += 1
            if temp_losses > consecutive_losses:
                consecutive_losses = temp_losses
            temp_losses = 0
        elif pnl < 0:
            total_losses += 1
            temp_losses += 1
            if temp_wins > consecutive_wins:
                consecutive_wins = temp_wins
            temp_wins = 0

    win_percentage = round(total_wins / (total_wins + total_losses) * 100, 2)
    loss_percentage = round(total_losses / (total_wins + total_losses) * 100, 2)

    return total_wins, total_losses, consecutive_wins, consecutive_losses, win_percentage, loss_percentage
    
# Calculate r2 score
def calculate_r2_score(ledger):
    y = ledger.pnl_sum.to_numpy()
    x = np.arange(len(y))
    
    # Mean of x and y
    x_mean = np.mean(x)
    y_mean = np.mean(y)
    
    # Centered variables
    x_centered = x - x_mean
    y_centered = y - y_mean
    
    # Covariance of x and y
    covariance = np.sum(x_centered * y_centered)
    
    # Variance of x and y
    variance_x = np.sum(x_centered ** 2)
    variance_y = np.sum(y_centered ** 2)
    
    # Calculate the correlation coefficient
    correlation = covariance / np.sqrt(variance_x * variance_y)
    
    # Calculate R^2 score
    r2 = correlation ** 2
    
    return round(r2, 2)

# positive negative pnl calculation
def pos_neg_pnl_percent(pnl_percent):
    # Negative PnL sum directly from filtering
    total_neg_pnl_percent = pnl_percent[pnl_percent < 0].sum()
    # total_neg_pnl_percent = neg_pnl_percent.sum()

    # Positive PnL sum directly from filtering
    total_pos_pnl_percent = pnl_percent[pnl_percent > 0].sum()

    # Total PnL percent (no need to store intermediate results)
    return total_neg_pnl_percent + total_pos_pnl_percent, total_neg_pnl_percent, total_pos_pnl_percent

# sharp calculation
def calculate_sharpe(returns):
    # Calculate the sharpe ratio using QuantStats library with risk free rate = 0 (2nd parameter)
    sharpe_ratio = qs.stats.sharpe(returns, 0)

    return round(sharpe_ratio,2)

# Calculate downside risk
def calculate_downside_risk(returns, risk_free=0):
    # Calculate adjusted returns by subtracting the risk-free rate
    adj_returns = returns - risk_free
    
    # Calculate squared downside risk
    sqr_downside = np.square(np.minimum(adj_returns, 0))
    
    # Calculate the mean of the squared downside and scale it by 252 (annualization factor)
    mean_sqr_downside = np.nanmean(sqr_downside)
    
    # Return the square root of the annualized downside risk
    return np.sqrt(mean_sqr_downside * 252)

# Calculate sortino
def calculate_sortino(returns):
    # Calculate the sortino using QuantStats library
    sortino=qs.stats.sortino(returns)
    
    return sortino

# Calculate average daily pnl
def average_daily_pnl(pnl_sum, date_started):
    # Ensure date_started is in datetime format
    if isinstance(date_started, str):
        date_started = datetime.strptime(date_started, '%Y-%m-%d %H:%M:%S')
    
    # Calculate the number of days between date_started and now
    delta = (datetime.now() - date_started).days
    
    # Calculate the average daily PnL
    daily_pnl = pnl_sum / delta
    
    return daily_pnl

# Caculate win / loss ratio
def calculate_win_loss_ratio(win_percentage, loss_percentage):
    # Handle division by zero by returning the win_percentage if loss_percentage is 0
    if loss_percentage == 0:
        return win_percentage
    
    # Calculate and return the win/loss ratio
    return win_percentage / loss_percentage

# Calculate alpha beta
def calculate_alpha_beta(df):
    # Convert necessary columns to NumPy arrays for faster computation
    close_price = df['close price'].astype(float).values
    entry_price = df['entry price'].astype(float).values
    pnl = df['PNL'].astype(float).values
    
    # Calculate btc_return using vectorized operations
    btc_return = (close_price / entry_price - 1) * 100
    
    # Linear regression using NumPy
    # Add a constant term (intercept) to the predictor
    X = np.vstack((pnl, np.ones_like(pnl))).T
    Y = btc_return
    
    # Solve for alpha (slope) and beta (intercept)
    coefficients = np.linalg.lstsq(X, Y, rcond=None)[0]
    
    # Coefficients[0] = alpha, Coefficients[1] = beta
    return coefficients[0], coefficients[1]

# Calculate all statistics
def calculate_all_statistics(strat_ledger):
    # Get the name of the date column from dataframe
    date_column = strat_ledger.columns[0]

    # Adding the pnl_sum column to the dataframe
    strat_ledger['pnl_sum'] = strat_ledger['PNL'].cumsum()
    
    # Calculate drawdown
    drawdown_list = calculate_drawdown(strat_ledger['pnl_sum'])
    
    # Calculate PnL sums for different periods
    pnl_sum_scores = get_last_pnl_scores(strat_ledger)
    
    # Calculate the total PnL percent, total positive and total negative pnl percent as well
    # total_pnl_percent = strat_ledger['PNL'].sum()
    total_pnl_percent, total_neg_pnl_percent, total_pos_pnl_percent = pos_neg_pnl_percent(strat_ledger['PNL'])

    # Calculate win/loss statistics
    total_wins, total_losses, consecutive_wins, consecutive_losses, win_percentage, loss_percentage = calculate_wins_losses(strat_ledger)
    win_loss_ratio = calculate_win_loss_ratio(win_percentage, loss_percentage)
    
    # Calculate average daily PnL
    current_pnl_sum = strat_ledger['pnl_sum'].iloc[-1]
    date_started = pd.to_datetime(strat_ledger[date_column].iloc[0])
    avg_daily_pnl = average_daily_pnl(current_pnl_sum, date_started)
    
    # Filter dataframe for non-zero close price
    temp_df = strat_ledger[strat_ledger['close price'] != 0]
    
    # Calculate alpha and beta
    alpha, beta = calculate_alpha_beta(temp_df)
    
    # Calculate Sharpe and Sortino ratios
    sharpe = calculate_sharpe(temp_df['PNL'])
    sortino = calculate_sortino(temp_df['PNL'])

    # Calculate r2 score
    r2_score = calculate_r2_score(strat_ledger)
    
    # Calculate downside risk
    downside_risk = calculate_downside_risk(temp_df['PNL'])
    
    # Calculate drawdown statistics
    drawdown_durations, max_drawdown, max_drawdown_duration, curr_drawdown, curr_drawdown_duration = longest_drawdown(
        strat_ledger['pnl_sum'], strat_ledger[date_column]
    )
    average_drawdown = round(np.mean(drawdown_list), 2) if drawdown_list else 0
    average_drawdown_duration = round(np.mean(drawdown_durations), 2) if drawdown_durations else 0
    
    # Create a dictionary with descriptive keys
    stats_dict = {
        date_column: strat_ledger[date_column].iloc[-1],
        'Current Drawdown': -round(float(abs(curr_drawdown)), 2),
        'Current Drawdown Duration (days)': round(float(curr_drawdown_duration), 2),
        'Average Drawdown': -round(float(abs(average_drawdown)), 2),
        'Average Drawdown Duration (days)': round(float(average_drawdown_duration), 2),
        'Maximum Drawdown': -round(float(abs(max_drawdown)), 2),
        'Maximum Drawdown Duration (days)': round(float(max_drawdown_duration), 2),
        'R-squared Score': round(float(r2_score), 2),
        'Sharpe Ratio': round(float(sharpe), 2),
        'Sortino Ratio': round(float(sortino), 2),
        'Total PnL (%)': round(float(total_pnl_percent), 2),
        'Total Positive PnL (%)': round(float(total_pos_pnl_percent), 2),
        'Total Negative PnL (%)': round(float(total_neg_pnl_percent), 2),
        'Total Wins': round(float(total_wins), 2),
        'Total Losses': round(float(total_losses), 2),
        'Consecutive Wins': round(float(consecutive_wins), 2),
        'Consecutive Losses': round(float(consecutive_losses), 2),
        'Win Percentage (%)': round(float(win_percentage), 2),
        'Loss Percentage (%)': round(float(loss_percentage), 2),
        'PnL Sum 1': round(float(pnl_sum_scores[0]), 2),
        'PnL Sum 7': round(float(pnl_sum_scores[1]), 2),
        'PnL Sum 15': round(float(pnl_sum_scores[2]), 2),
        'PnL Sum 30': round(float(pnl_sum_scores[3]), 2),
        'PnL Sum 45': round(float(pnl_sum_scores[4]), 2),
        'PnL Sum 60': round(float(pnl_sum_scores[5]), 2),
        'Average Daily PnL': round(float(avg_daily_pnl), 2),
        'Win/Loss Ratio': round(float(win_loss_ratio), 2),
        'Alpha': round(float(alpha), 2),
        'Beta': round(float(beta), 2),
        'Downside Risk': round(float(downside_risk), 2),
    }

    # Convert dictionary to DataFrame for better visualization (optional)
    stats_df = pd.DataFrame([stats_dict])

    stats_df.set_index(date_column, inplace = True)

    # Convert the index to datetime format
    stats_df.index = pd.to_datetime(stats_df.index)
    
    print(f'{colors.GREEN}All stats calculated!{colors.RESET}')

    return stats_dict, stats_df

# Main

### Reading The Previously Saved Data For BTCUSDT

In [17]:
    BTCUSDT_Filtered_data_1M_last_month = pd.read_csv(get_csv_tail(full_csv_path_1, max_rows=85165), usecols = ['Open time (1M)', 'Open', 'High', 'Low', 'Close', 'Volume'])
    BTCUSDT_Filtered_data_1M_last_month.set_index('Open time (1M)', inplace = True)
    
    # Convert the index to datetime format
    BTCUSDT_Filtered_data_1M_last_month.index = pd.to_datetime(BTCUSDT_Filtered_data_1M_last_month.index)

In [18]:
BTCUSDT_Filtered_data_1M_last_month

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-06-18 03:47:00,66519.3,66519.3,66490.0,66490.0,1.022
2024-06-18 03:48:00,66519.3,66519.3,66490.0,66490.5,0.762
2024-06-18 03:49:00,66490.6,66519.2,66490.0,66490.0,1.298
2024-06-18 03:50:00,66492.0,66500.0,66492.0,66500.0,2.510
2024-06-18 03:51:00,66500.0,66500.0,66492.0,66500.0,3.236
...,...,...,...,...,...
2024-07-18 03:43:00,64280.1,64280.1,64280.1,64280.1,0.000
2024-07-18 03:44:00,64280.1,64438.2,64280.1,64280.1,1.505
2024-07-18 03:45:00,64438.2,64438.2,64280.1,64438.2,0.053
2024-07-18 03:46:00,64280.1,64438.0,64280.1,64280.2,1.544


In [19]:
BTCUSDT_Filtered_data_1M_last_month_1st = BTCUSDT_Filtered_data_1M_last_month[:29779].copy()
BTCUSDT_Filtered_data_1M_last_month_2nd = BTCUSDT_Filtered_data_1M_last_month[29779:-1].copy()
BTCUSDT_Filtered_data_1M_last = BTCUSDT_Filtered_data_1M_last_month[-1:].copy()

In [20]:
BTCUSDT_Filtered_data_1M_last_month_1st

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-06-18 03:47:00,66519.3,66519.3,66490.0,66490.0,1.022
2024-06-18 03:48:00,66519.3,66519.3,66490.0,66490.5,0.762
2024-06-18 03:49:00,66490.6,66519.2,66490.0,66490.0,1.298
2024-06-18 03:50:00,66492.0,66500.0,66492.0,66500.0,2.510
2024-06-18 03:51:00,66500.0,66500.0,66492.0,66500.0,3.236
...,...,...,...,...,...
2024-07-08 20:01:00,56612.0,56612.0,56200.0,56309.4,5.851
2024-07-08 20:02:00,56256.8,56814.0,56159.9,56159.9,12.976
2024-07-08 20:03:00,56403.6,56814.0,56168.6,56569.8,5.523
2024-07-08 20:04:00,56612.9,56741.0,56612.9,56612.9,0.247


In [21]:
BTCUSDT_Filtered_data_1M_last_month_2nd

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-08 20:06:00,56839.4,56839.4,55706.9,56713.7,20.803
2024-07-08 20:07:00,56713.7,56741.0,56155.1,56386.4,9.993
2024-07-08 20:08:00,56306.2,56569.8,56244.4,56339.9,0.903
2024-07-08 20:09:00,56373.9,56839.4,56343.5,56569.8,3.120
2024-07-08 20:10:00,56689.4,56796.1,56386.5,56576.0,2.644
...,...,...,...,...,...
2024-07-18 03:42:00,64354.5,64354.5,64280.1,64280.1,1.119
2024-07-18 03:43:00,64280.1,64280.1,64280.1,64280.1,0.000
2024-07-18 03:44:00,64280.1,64438.2,64280.1,64280.1,1.505
2024-07-18 03:45:00,64438.2,64438.2,64280.1,64438.2,0.053


In [22]:
BTCUSDT_Filtered_data_1M_last

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-18 03:47:00,64280.2,64438.2,64280.2,64438.2,1.43


In [23]:
# BTCUSDT_Filtered_data_1M = pd.read_csv(full_csv_path, usecols = ['Open time (1M)', 'Open', 'High', 'Low', 'Close', 'Volume'])
# BTCUSDT_Filtered_data_1M.set_index('Open time (1M)', inplace = True)

# # Convert the index to datetime format
# BTCUSDT_Filtered_data_1M.index = pd.to_datetime(BTCUSDT_Filtered_data_1M.index)

In [24]:
# BTCUSDT_Filtered_data_1M

### Now Making Of The Script

In [11]:
all_models_info_df = pd.read_csv(full_csv_path_2, index_col = 0)
all_models_info_df

Unnamed: 0,Model Name,Model Backbone,Nickname,Coin,Strategy,Timeframe
0,OBV_1h,TI,Rayla,BTCUSDT,OBV,1h
1,SMMA_1h,TI,Aravos,BTCUSDT,SMMA,1h
2,MACD_1h,TI,Stardust,BTCUSDT,MACD,1h
3,STOCHRSI_1h,TI,Ezran,BTCUSDT,STOCHRSI,1h
4,ADX_PSAR_1h,TI,Horsehead,BTCUSDT,ADX_PSAR,1h
...,...,...,...,...,...,...
69,KALMAN_DARTS_UNTESTED_4h,DARTS,X-23,BTCUSDT,KALMAN,4h
70,TBATS_DARTS_UNTESTED_4h,DARTS,X-24,BTCUSDT,TBATS,4h
71,RNN_DARTS_UNTESTED_4h,DARTS,X-25,BTCUSDT,RNN,4h
72,TIDE_DARTS_UNTESTED_4h,DARTS,X-26,BTCUSDT,TIDE,4h


In [18]:
import pandas as pd
import time
from datetime import datetime

def get_seconds_from_timeframe(timeframe):
    """
    Convert timeframe string (e.g., '1h', '4h') to seconds.
    """
    unit = timeframe[-1]
    value = int(timeframe[:-1])
    if unit == 'h':
        return value * 3600  # Convert hours to seconds
    elif unit == 'm':
        return value * 60    # Convert minutes to seconds
    elif unit == 's':
        return value         # Already in seconds
    else:
        raise ValueError(f"Unknown timeframe unit: {unit}")

def filter_dataframe_by_time(df, current_time):
    # Convert the current time to seconds since midnight
    seconds_since_midnight = current_time.hour * 3600 + current_time.minute * 60 - 600 # Subtracting 10 minutes

    # Filter the dataframe based on whether the current time is divisible by the timeframe in seconds
    filtered_df = df[df['Timeframe'].apply(lambda tf: seconds_since_midnight % get_seconds_from_timeframe(tf) == 0)]

    return filtered_df

In [19]:
while True:
    # Get the current time
    current_time = datetime.now()

    # Filter the DataFrame based on the current time
    filtered_df = filter_dataframe_by_time(copy.deepcopy(all_models_info_df), current_time)
    if filtered_df.empty:
        print('No models fit in this timeframe')
    else:
        print(filtered_df)

    print('Now waiting 60 seconds to check again!')
    # Sleep for a minute before checking again
    time.sleep(60)

In [31]:
temp = datetime(2024, 8, 13, 0, 10, 0, 0)
temp

datetime.datetime(2024, 8, 13, 0, 10)

In [32]:
# Filter the DataFrame based on the current time
filtered_df = filter_dataframe_by_time(copy.deepcopy(all_models_info_df), temp)
filtered_df

Unnamed: 0,Model Name,Model Backbone,Nickname,Coin,Strategy,Timeframe
0,OBV_1h,TI,Rayla,BTCUSDT,OBV,1h
1,SMMA_1h,TI,Aravos,BTCUSDT,SMMA,1h
2,MACD_1h,TI,Stardust,BTCUSDT,MACD,1h
3,STOCHRSI_1h,TI,Ezran,BTCUSDT,STOCHRSI,1h
4,ADX_PSAR_1h,TI,Horsehead,BTCUSDT,ADX_PSAR,1h
...,...,...,...,...,...,...
69,KALMAN_DARTS_UNTESTED_4h,DARTS,X-23,BTCUSDT,KALMAN,4h
70,TBATS_DARTS_UNTESTED_4h,DARTS,X-24,BTCUSDT,TBATS,4h
71,RNN_DARTS_UNTESTED_4h,DARTS,X-25,BTCUSDT,RNN,4h
72,TIDE_DARTS_UNTESTED_4h,DARTS,X-26,BTCUSDT,TIDE,4h


In [31]:
filtered_df_ti = filtered_df[filtered_df['Model Backbone'] == 'TI']
filtered_df_ti

Unnamed: 0,Model Name,Model Backbone,Nickname,Coin,Strategy,Timeframe
0,OBV_1h,TI,Rayla,BTCUSDT,OBV,1h
1,SMMA_1h,TI,Aravos,BTCUSDT,SMMA,1h
2,MACD_1h,TI,Stardust,BTCUSDT,MACD,1h
3,STOCHRSI_1h,TI,Ezran,BTCUSDT,STOCHRSI,1h
4,ADX_PSAR_1h,TI,Horsehead,BTCUSDT,ADX_PSAR,1h
5,EMA_1h,TI,Cygnus,BTCUSDT,EMA,1h
14,OBV_4h,TI,Anak Arao,BTCUSDT,OBV,4h
15,SMMA_4h,TI,Elarion,BTCUSDT,SMMA,4h
16,MACD_4h,TI,Ziard,BTCUSDT,MACD,4h
39,OBV_6h,TI,Corvus,BTCUSDT,OBV,6h


### Creating the functions

In [84]:
def generate_signals_ti(df, strategy):
    match strategy:
        case "MACD":
            print(f"\n{colors.BLUE}MACD Strategy detected.{colors.RESET}")
            
            # Calculate MACD for the 4-Hour data
            signal_df = calculate_macd(copy.deepcopy(df))
            
            # Generate trading signals based on MACD
            signal_df = generate_macd_signals(signal_df)
        
        case "ADX_PSAR":
            print(f"\n{colors.BLUE}ADX PSAR Strategy detected.{colors.RESET}")

            # Calculate ADX and Parabolic SAR
            signal_df = calculate_adx(copy.deepcopy(df), window = 14)
            signal_df = calculate_parabolic_sar(signal_df)
            
            # Generate trading signals
            signal_df = generate_adx_parabolic_sar_signals(signal_df)
        
        case "RSI":
            print(f"\n{colors.BLUE}RSI Strategy detected.{colors.RESET}")

            # Calculate RSI for the 1-Hour data
            signal_df = calculate_rsi(copy.deepcopy(df))
            
            # Generate trading signals based on RSI
            signal_df = generate_rsi_signals(signal_df)
        
        case "STOCHRSI":
            print(f"\n{colors.BLUE}STOCHASTIC RSI Strategy detected.{colors.RESET}")

            # Calculate RSI for the 1-Hour data
            signal_df = calculate_stochrsi(copy.deepcopy(df), fillna = True)
            
            # Generate trading signals based on RSI
            signal_df = generate_stochrsi_signals(signal_df)
        
        case "OBV":
            print(f"\n{colors.BLUE}On-Balance Volume Strategy detected.{colors.RESET}")

            # Calculate The On-Balance Volume (OBV)
            signal_df = calculate_obv(copy.deepcopy(df), fillna = True)
            
            # Generate Signals Using OBV
            signal_df = generate_obv_signals(signal_df)

        case "SMA":
            print(f"\n{colors.BLUE}SMA Strategy detected.{colors.RESET}")

            # Assuming `df` is your DataFrame containing 'Close' prices
            signal_df = generate_sma_signals(copy.deepcopy(df), short_window=50, long_window=200)
        
        case "EMA":
            print(f"\n{colors.BLUE}EMA Strategy detected.{colors.RESET}")

            # Generate trading signals using ema
            signal_df = generate_triple_ema_signals(copy.deepcopy(df), short_window = 5, medium_window = 21, long_window = 50)
        
        case "SMMA":
            print(f"\n{colors.BLUE}SMMA Strategy detected.{colors.RESET}")

            # Generate trading signals using smma 
            signal_df = generate_smma_signals(copy.deepcopy(df), window=14)

        case "VWMA":
            print(f"\n{colors.BLUE}VWMA Strategy detected.{colors.RESET}")

            # Generate trading signals using vwma
            signal_df = generate_vwma_signals(copy.deepcopy(df), short_window=10, long_window=50)
        
        case "AO":
            print(f"\n{colors.BLUE}AWESOME OSCILLATOR Strategy detected.{colors.RESET}")

            # Calculate The Awesome Oscillator (AO)
            signal_df = calculate_AO(copy.deepcopy(df), fillna = True)
            
            # Generate Signals Using OBV
            signal_df = generate_AO_signals(signal_df)
            
        case _:
            raise AttributeError(f'\n{colors.WARNING}An Unknown Strategy Name Was Found!{colors.RESET}')

    # Reserve the last signal and prediction time (for the future prediction)
    last_signal = signal_df['Signal'].iloc[-1]
    last_signal_time = signal_df.index[-1]

    # Shift all the signal data points one step ahead (down) to mimic future prediction of past data
    signal_df['Signal'] = signal_df['Signal'].shift(1).fillna(0)
    signal_df = signal_df[1:].copy()

    return signal_df, last_signal, last_signal_time

In [85]:
def generate_signals_ml(df, strategy, model_path):

    # Define the function to generate signals
    def generate_signal(row):
        if row['Close'] > row['Predicted']:
            return -1
        elif row['Close'] < row['Predicted']:
            return 1
        else:
            return 0

    def load_model(model_path):
        """
        Load a model from disk.
        
        Parameters:
        filename (str): The path from which to load the model.
        
        Returns:
        model (sklearn.base.BaseEstimator): The loaded model.
        """
        try:
            model = joblib.load(model_path)
            print(f"{colors.GREEN}Model loaded from {model_path}{colors.RESET}")
            return model
        except Exception as e:
            print(f"{colors.WARNING}Failed to load model. Error: {e}{colors.RESET}")
            return None

    print(f"\n{colors.BLUE}{strategy} Strategy detected.{colors.RESET}")

    # # Temporary
    # if 'OPTUNA' in strategy:
    #     print(f'{colors.WARNING}OPTUNA Models are not implemented yet. Skipping...{colors.RESET}')
    #     return None, None, None

    # Load the desired model
    model = load_model(model_path)
    if model is None:
        print(f'{colors.WARNING}Unexpcted behaviour. Model loaded as None Type.{colors.RESET}')
        return None, None, None

    # Get the close price predictions from the model
    close_pred = model.predict(df)

    # Add the predictions to the Dataframe
    df['Predicted'] = close_pred

    # Apply the function to create the 'Signal' column
    # and also extract the last predicted signal and
    # the last predicted signal's time for further 
    # use in the simulation code.
    df.loc[:, 'Signal'] = df.apply(generate_signal, axis=1)
    last_signal = df['Signal'].iloc[-1]
    last_signal_time = df.index[-1]

    # Shift the 'Signal' column
    df.loc[:, 'Signal'] = df['Signal'].shift(1).fillna(0)
    
    # Drop the first row because it has a NaN value in 'Signal Actual'
    signal_df = df[1:].copy()

    return signal_df, last_signal, last_signal_time

In [90]:
import os
import pandas as pd
import joblib

class colors:
    BLUE = '\033[94m'
    YELLOW = '\033[33m'
    GREEN = '\033[92m'
    WARNING = '\033[31m'
    RESET = '\033[0m'

def simulation(df, df_info):
    # Store all timeframe dataframes required in a dictionary
    # Assuming df contains your model information
    timeframes = df_info['Timeframe'].unique()  # Extract unique timeframes
    
    # Dictionary to store resampled dataframes
    timeframe_dfs = {}
    
    # Iterate over each unique timeframe and resample the data accordingly
    for timeframe in timeframes:
        # Convert the original DataFrame to the specific timeframe using your conversion function
        resampled_df = convert_1m_to_any_timeframe(copy.deepcopy(df), timeframe)
        
        # Store the resampled dataframe in the dictionary with the timeframe as the key
        timeframe_dfs[timeframe] = resampled_df

    # Settig Up The Directory From The Directory Hierarchy
    # Get the notebook's current directory
    notebook_dir = os.getcwd()
    
    # Go up one level to the parent directory
    parent_dir = os.path.abspath(os.path.join(notebook_dir, os.pardir))
    
    # Go to the main data directory andgo to the models directory
    data_dir = os.path.join(parent_dir, "data")
    model_dir = os.path.join(data_dir, 'models')

    # Go the the ledger directory in the 'model' directory
    ledger_dir = os.path.join(model_dir, 'ledger')

    # Go to the current status directory in the 'model' directory
    current_status_dir = os.path.join(model_dir, 'current status')

    # Go to the statistics directory in the 'model' directory
    statistics_dir = os.path.join(model_dir, 'statistics')

    # Go to the pickle files directory in the 'model' directory
    pickle_files_dir = os.path.join(model_dir, 'pickle files')

    # Go to the metadata directory in the 'model' directory
    metadata_dir = os.path.join(model_dir, 'metadata')
    metadata_file = os.path.join(metadata_dir, 'metadata.csv')
    
    # Create directories if they don't exist
    os.makedirs(ledger_dir, exist_ok=True)
    os.makedirs(current_status_dir, exist_ok=True)
    os.makedirs(statistics_dir, exist_ok=True)
    os.makedirs(metadata_dir, exist_ok=True)
    os.makedirs(pickle_files_dir, exist_ok=True)
    
    # Iterate through each row of the DataFrame
    for _, row in df_info.iterrows():
        strategy = row['Strategy']
        timeframe = row['Timeframe']
        nickname = row['Nickname']
        backbone = row['Model Backbone']
        coin = row['Coin']
        
        # Define the path to all the file
        ledger_file = os.path.join(ledger_dir, f'{nickname}.csv')
        status_file = os.path.join(current_status_dir, f'{nickname}_status.csv')
        statistics_file = os.path.join(statistics_dir, f'{nickname}_stats.csv')
        model_pickle_file = os.path.join(pickle_files_dir, f'{nickname}.pkl')
        
        # Select the correct timeframe DataFrame from the dictionary
        required_df = timeframe_dfs[timeframe]
        
        # Generate signals based on the strategy
        if backbone == 'TI':
            signals_df, last_signal, last_signal_time = generate_signals_ti(copy.deepcopy(required_df), strategy)
        elif backbone == 'ML':
            signals_df, last_signal, last_signal_time = generate_signals_ml(copy.deepcopy(required_df), strategy, model_pickle_file)
        elif backbone == 'DL':
            print(f'{colors.WARNING}Deep Learning Models are not implemented yet. Skipping...{colors.RESET}')
            continue
        elif backbone == 'DARTS':
            print(f'{colors.WARNING}DART Models are not implemented yet. Skipping...{colors.RESET}')
            continue

        # Check if signals_df is None. (Meaning model loaded as None Type: Unexpected Behaviour)
        if signals_df is None:
            continue
        
        # Check if signals_df is not empty before proceeding
        if signals_df.empty:
            print(f"{colors.WARNING}No signals generated for {nickname}. Skipping...{colors.RESET}")
            continue

        # Saving part of signal dataframe for frequency inferring in backtesting function
        df_for_freq_inferring = signals_df[-10:].copy()
        
        # Generate the new ledger using the backtesting function
        if os.path.exists(ledger_file):
            # Reading the exisitng ledger
            existing_ledger = pd.read_csv(ledger_file)
            existing_ledger.set_index(existing_ledger.columns[0], inplace = True)
            existing_ledger.index = pd.to_datetime(existing_ledger.index)

            # Extracting the balance till the last trade
            last_balance_at_that_time = existing_ledger['Balance'].iloc[-1]

            # Getting only the required signals
            signals_df = signals_df[signals_df.index >= existing_ledger.index[-1]]

            # Generate the new backtest ledger
            new_ledger = generate_backtest_df(signals_df, df[str(signals_df.index[0]): str(signals_df.index[-1])].copy(), df_for_freq_inferring, last_balance_at_that_time)

            # If ledger returned is empty, dont concatenate, else concatenate
            if new_ledger.empty:
                for_stats = existing_ledger.copy()
            else:
                for_stats = pd.concat([existing_ledger, new_ledger])
                for_stats = for_stats.sort_index()
        else:
            new_ledger = generate_backtest_df(signals_df, df[str(signals_df.index[0]): str(signals_df.index[-1])].copy(), df_for_freq_inferring)
            for_stats = new_ledger.copy()

        # If no signal change is detected, (no trade has begun)
        if for_stats.empty:
            print(f"{colors.WARNING}Empty ledger created for {nickname} - no trades held. Skipping...{colors.RESET}")
            continue
        
        # Generate the stats df
        new_ledger_with_all_pnl = calculate_pnl_sum_all(for_stats.reset_index())
        _, stats_df = calculate_all_statistics(new_ledger_with_all_pnl)

        # Retrieve the cumulative pnl from stats_df to save to metadata csv
        cumulative_pnl = stats_df['Total PnL (%)'].iloc[-1]

        # SAVING TO CSVS / DATABASES
        # Appending or saving to the ledger file
        try:
            # Determine if the file already exists
            if os.path.exists(ledger_file):

                # Check if ledger is empty, then data is already up to date
                if new_ledger.empty:
                    print(f'{colors.YELLOW}Ledger data already up to date!{colors.RESET}')
                else:
                    # Append the newly fetched data to the existing CSV without header
                    new_ledger.to_csv(ledger_file, mode='a', header=False, index=True)
                    print(f"{colors.GREEN}Ledger appended to {colors.BLUE}{ledger_file}{colors.RESET}")
            else:
                # Write the new data with header since file does not exist
                new_ledger.to_csv(ledger_file, mode='w', header=True, index=True)
                print(f"{colors.GREEN}Ledger created and ledger saved to {colors.BLUE}{ledger_file}{colors.RESET}")
            
        except Exception as e:
            print(f"{colors.WARNING}Failed to save / append ledger to {colors.BLUE}{ledger_file}. Error: {e}{colors.RESET}")

        # Saving to the status file
        try:
            # Get the current prediction time
            current_prediction_time = last_signal_time + pd.Timedelta(timeframe)
            
            # Calculate the next prediction time by adding the timeframe as a Timedelta
            next_prediction_time = current_prediction_time + pd.Timedelta(timeframe)
            
            # Convert both times to strings
            current_prediction_time = str(current_prediction_time)
            next_prediction_time = str(next_prediction_time)

            # Get the current prediction
            current_prediction = last_signal

            if new_ledger.empty:
                current_balance = existing_ledger['Balance'].iloc[-1]
            else:
                current_balance = new_ledger['Balance'].iloc[-1]

            pd.DataFrame({
                'Model Name': [nickname],
                'Timeframe' : [timeframe],
                'Current Prediction Time': [current_prediction_time],
                'Current Prediction': [current_prediction],
                'Next Prediction Time': [next_prediction_time],
            }).to_csv(status_file, mode='w', header=True, index=True)

            print(f"{colors.GREEN}Status saved to {colors.BLUE}{status_file}{colors.RESET}")

        except Exception as e:
            print(f"{colors.WARNING}Failed to save status to {colors.BLUE}{status_file}. Error: {e}{colors.RESET}")

        # Saving to the statistics file
        try:
            # Determine if the file already exists
            if os.path.exists(statistics_file):
                # If it exists, read it into a DataFrame
                existing_stats = pd.read_csv(statistics_file)
                existing_stats.set_index(existing_stats.columns[0], inplace = True)
                existing_stats.index = pd.to_datetime(existing_stats.index, format='mixed')
                
                # Find the index where the new ledger starts
                last_stats_index = existing_stats.index[-1]

                # Get only those values that are 
                stats_df = stats_df[stats_df.index > pd.to_datetime(last_stats_index)]

                if stats_df.empty:
                    print(f'{colors.YELLOW}Stats data already up to date!{colors.RESET}{colors.GREEN}')
                else:
                    # Append the newly fetched data to the existing CSV without header
                    stats_df.to_csv(statistics_file, mode='a', header=False, index=True)
                    print(f"{colors.GREEN}Stats appended to {colors.BLUE}{statistics_file}{colors.RESET}")
            else:
                # Write the new data with header since file does not exist
                stats_df.to_csv(statistics_file, mode='w', header=True, index=True)
                print(f"{colors.GREEN}Stats created and stats saved to {colors.BLUE}{statistics_file}{colors.RESET}")
            
        except Exception as e:
            print(f"{colors.WARNING}Failed to save / append stats to {colors.BLUE}{statistics_file}. Error: {e}{colors.RESET}")

        # Saving to the metadata file
        try:
            # Make a list of the new values
            new_values = [nickname, backbone, coin, timeframe, current_prediction_time, current_prediction, next_prediction_time, cumulative_pnl]
            
            # Determine if the file already exists
            if os.path.exists(metadata_file):
                # If it exists, read it into a DataFrame
                existing_metadata = pd.read_csv(metadata_file)
        
                # Check if the model is present and update the row
                if nickname in existing_metadata['Model Name'].values:
                    # Get the current row
                    current_row = existing_metadata.loc[existing_metadata['Model Name'] == nickname]
                    
                    # Check if any of the values are different
                    if current_row.iloc[0].to_list() != new_values:
                        # Update the row with the new values
                        existing_metadata.loc[existing_metadata['Model Name'] == nickname, :] = new_values
                        existing_metadata.to_csv(metadata_file, mode='w', header=True, index=False)
                        print(f"{colors.GREEN}Metadata for {nickname} updated and saved to {colors.BLUE}{metadata_file}{colors.RESET}")
                    else:
                        print(f"{colors.YELLOW}Metadata for {nickname} is already up to date!{colors.RESET}")
                else:
                    # Add a new row to the DataFrame
                    new_row = pd.DataFrame(
                        [new_values],
                        columns = [
                            'Model Name',
                            'Backbone',
                            'Coin',
                            'Timeframe',
                            'Current Prediction Time',
                            'Current Prediction',
                            'Next Prediction Time',
                            'Total PNL'
                        ]
                    )
                    
                    # Add the new row to the existing DataFrame
                    existing_metadata = pd.concat([existing_metadata, new_row], axis=0, ignore_index=True)
        
                    # Overwrite to the new CSV
                    existing_metadata.to_csv(metadata_file, mode='w', header=True, index=False)
                    print(f"{colors.GREEN}Metadata for {nickname} added / appended to {colors.BLUE}{metadata_file}{colors.RESET}")
            else:
                # Write the new data with header since the file does not exist
                pd.DataFrame({
                    'Model Name': [nickname],
                    'Backbone' : [backbone],
                    'Coin': [coin],
                    'Timeframe': [timeframe],
                    'Current Prediction Time': [current_prediction_time],
                    'Current Prediction': [current_prediction],
                    'Next Prediction Time': [next_prediction_time],
                    'Total PNL': [cumulative_pnl]
                }).to_csv(metadata_file, mode='w', header=True, index=False)
                
                print(f"{colors.GREEN}Metadata created and metadata for {nickname} saved to {colors.BLUE}{metadata_file}{colors.RESET}")

        except Exception as e:
            print(f"{colors.WARNING}Failed to save / add / append metadata to {colors.BLUE}{metadata_file}. Error: {e}{colors.RESET}")

        # Adding a new line after every model prediction
        print()

    print(f'\n{colors.GREEN}Simulation Completed Succesfully!{colors.RESET}')

In [91]:
simulation(copy.deepcopy(BTCUSDT_Filtered_data_1M_last_month), filtered_df)


[94mOn-Balance Volume Strategy detected.[0m
[92mAll stats calculated![0m
[33mLedger data already up to date![0m
[92mStatus saved to [94mC:\Users\Ali\Desktop\Neurog Internship\data\models\current status\Rayla_status.csv[0m
[33mStats data already up to date![0m[92m
[33mMetadata for Rayla is already up to date![0m


[94mSMMA Strategy detected.[0m
[92mAll stats calculated![0m
[33mLedger data already up to date![0m
[92mStatus saved to [94mC:\Users\Ali\Desktop\Neurog Internship\data\models\current status\Aravos_status.csv[0m
[33mStats data already up to date![0m[92m
[33mMetadata for Aravos is already up to date![0m


[94mMACD Strategy detected.[0m
[92mAll stats calculated![0m
[33mLedger data already up to date![0m
[92mStatus saved to [94mC:\Users\Ali\Desktop\Neurog Internship\data\models\current status\Stardust_status.csv[0m
[33mStats data already up to date![0m[92m
[33mMetadata for Stardust is already up to date![0m


[94mSTOCHASTIC RSI Strategy d

  self._psar[i] = high2


[92mAll stats calculated![0m
[33mLedger data already up to date![0m
[92mStatus saved to [94mC:\Users\Ali\Desktop\Neurog Internship\data\models\current status\Horsehead_status.csv[0m
[33mStats data already up to date![0m[92m
[33mMetadata for Horsehead is already up to date![0m


[94mEMA Strategy detected.[0m
[92mAll stats calculated![0m
[33mLedger data already up to date![0m
[92mStatus saved to [94mC:\Users\Ali\Desktop\Neurog Internship\data\models\current status\Cygnus_status.csv[0m
[33mStats data already up to date![0m[92m
[33mMetadata for Cygnus is already up to date![0m


[94mLR Strategy detected.[0m
[92mModel loaded from C:\Users\Ali\Desktop\Neurog Internship\data\models\pickle files\Azymondius.pkl[0m
[92mAll stats calculated![0m
[33mLedger data already up to date![0m
[92mStatus saved to [94mC:\Users\Ali\Desktop\Neurog Internship\data\models\current status\Azymondius_status.csv[0m
[33mStats data already up to date![0m[92m
[33mMetadata for Az

In [68]:
BTCUSDT_Filtered_data_1M_last_month_2nd

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-07-08 20:06:00,56839.4,56839.4,55706.9,56713.7,20.803
2024-07-08 20:07:00,56713.7,56741.0,56155.1,56386.4,9.993
2024-07-08 20:08:00,56306.2,56569.8,56244.4,56339.9,0.903
2024-07-08 20:09:00,56373.9,56839.4,56343.5,56569.8,3.120
2024-07-08 20:10:00,56689.4,56796.1,56386.5,56576.0,2.644
...,...,...,...,...,...
2024-07-18 03:42:00,64354.5,64354.5,64280.1,64280.1,1.119
2024-07-18 03:43:00,64280.1,64280.1,64280.1,64280.1,0.000
2024-07-18 03:44:00,64280.1,64438.2,64280.1,64280.1,1.505
2024-07-18 03:45:00,64438.2,64438.2,64280.1,64438.2,0.053


## For Testing The Stats Calculation Code

### Making The temporary dataframe with obv signals and backtesting it

In [49]:
df_temp_1m = pd.read_csv(full_csv_path_1, usecols = ['Open time (1M)', 'Open', 'High', 'Low', 'Close', 'Volume'])
df_temp_1m.set_index('Open time (1M)', inplace = True)

# Convert the index to datetime format
df_temp_1m.index = pd.to_datetime(df_temp_1m.index)

In [50]:
df_temp_4h = convert_1m_to_any_timeframe(copy.deepcopy(df_temp_1m), '4h')

In [51]:
df_temp_1m = df_temp_1m[: str(df_temp_4h.index[-1])]
df_temp_1m

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 00:00:00,7169.71,7169.71,7165.44,7167.83,3509.860
2020-01-01 00:01:00,7167.83,7168.28,7158.66,7159.95,3821.170
2020-01-01 00:02:00,7161.03,7165.40,7161.03,7162.46,3041.710
2020-01-01 00:03:00,7161.74,7164.27,7160.30,7161.03,3682.650
2020-01-01 00:04:00,7161.03,7164.25,7160.15,7160.15,2936.690
...,...,...,...,...,...
2024-07-17 23:56:00,64637.90,64742.90,64242.90,64443.60,6.473
2024-07-17 23:57:00,64443.60,64742.90,64242.90,64700.00,2.546
2024-07-17 23:58:00,64700.00,64700.00,64603.20,64603.20,1.115
2024-07-17 23:59:00,64689.00,64700.00,64603.20,64603.20,0.079


In [52]:
df_temp_4h

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 00:00:00,7169.71,7207.23,7156.65,7202.48,3428.062092
2020-01-01 04:00:00,7202.48,9592.00,6871.45,7241.63,285722.475587
2020-01-01 08:00:00,7241.63,7243.46,7215.94,7223.72,3629.004242
2020-01-01 12:00:00,7223.71,7233.33,7178.00,7201.02,3491.032767
2020-01-01 16:00:00,7201.56,7236.58,7187.86,7220.85,3125.298729
...,...,...,...,...,...
2024-07-17 08:00:00,66196.50,66998.80,63555.00,65000.10,4.510771
2024-07-17 12:00:00,65000.10,66000.00,63555.00,65503.90,5.291817
2024-07-17 16:00:00,65446.30,66000.00,63892.40,64689.00,5.464083
2024-07-17 20:00:00,64669.70,64999.00,64240.00,64603.20,9.204992


In [53]:
# Calculate The On-Balance Volume (OBV)
temp_signal_df = calculate_obv(copy.deepcopy(df_temp_4h), fillna = True)

# Generate Signals Using OBV
temp_signal_df = generate_obv_signals(temp_signal_df)
temp_signal_df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,OBV,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-01 00:00:00,7169.71,7207.23,7156.65,7202.48,3428.062092,3428.062092,0
2020-01-01 04:00:00,7202.48,9592.00,6871.45,7241.63,285722.475587,289150.537679,1
2020-01-01 08:00:00,7241.63,7243.46,7215.94,7223.72,3629.004242,285521.533438,-1
2020-01-01 12:00:00,7223.71,7233.33,7178.00,7201.02,3491.032767,282030.500671,-1
2020-01-01 16:00:00,7201.56,7236.58,7187.86,7220.85,3125.298729,285155.799400,1
...,...,...,...,...,...,...,...
2024-07-17 08:00:00,66196.50,66998.80,63555.00,65000.10,4.510771,793937.920835,-1
2024-07-17 12:00:00,65000.10,66000.00,63555.00,65503.90,5.291817,793943.212652,1
2024-07-17 16:00:00,65446.30,66000.00,63892.40,64689.00,5.464083,793937.748569,-1
2024-07-17 20:00:00,64669.70,64999.00,64240.00,64603.20,9.204992,793928.543577,-1


In [55]:
temp_backtest_df = generate_backtest_df(copy.deepcopy(temp_signal_df), df_temp_1m, temp_signal_df[:20].copy())
temp_backtest_df

Unnamed: 0_level_0,direction,entry price,close price,action,PNL,Balance
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01 04:00:00,long,7202.48,0.00,,,1.331664e+03
2020-01-01 05:00:00,short,7202.48,9592.00,tp,33.176350,1.331664e+03
2020-01-01 08:00:00,short,7241.63,0.00,,,1.338899e+03
2020-01-01 16:00:00,long,7241.63,7201.56,direction,0.553328,1.338899e+03
2020-01-01 16:00:00,long,7201.56,0.00,,,1.346322e+03
...,...,...,...,...,...,...
2024-07-17 08:00:00,short,64670.80,66196.50,direction,2.359179,9.927818e+47
2024-07-17 08:00:00,short,66196.50,0.00,,,1.010626e+48
2024-07-17 12:00:00,long,66196.50,65000.10,direction,1.807346,1.010626e+48
2024-07-17 12:00:00,long,65000.10,0.00,,,1.017462e+48


In [56]:
# Generate the stats df
new_ledger_with_all_pnl = calculate_pnl_sum_all(temp_backtest_df.reset_index())
_, stats_df = calculate_all_statistics(new_ledger_with_all_pnl)

[92mAll stats calculated![0m


In [62]:
stats_df['Total PnL (%)'].iloc[-1]

28312.47

In [163]:
temp_signal_df_last = temp_signal_df[:25].copy()
temp_signal_df_last.loc[temp_signal_df_last.index[-1], 'Signal'] = 0
temp_signal_df_last

Unnamed: 0_level_0,Open,High,Low,Close,Volume,OBV,Signal
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-01-01 00:00:00,7169.71,7207.23,7156.65,7202.48,3428.062092,3428.062092,0
2020-01-01 04:00:00,7202.48,9592.0,6871.45,7241.63,285722.475587,289150.537679,1
2020-01-01 08:00:00,7241.63,7243.46,7215.94,7223.72,3629.004242,285521.533438,-1
2020-01-01 12:00:00,7223.71,7233.33,7178.0,7201.02,3491.032767,282030.500671,-1
2020-01-01 16:00:00,7201.56,7236.58,7187.86,7220.85,3125.298729,285155.7994,1
2020-01-01 20:00:00,7220.41,7253.21,7216.21,7242.21,3440.883271,288596.682671,1
2020-01-02 00:00:00,7242.21,7243.41,7187.84,7197.77,3633.728296,284962.954375,-1
2020-01-02 04:00:00,7197.77,7212.59,7155.02,7169.51,3370.041933,281592.912442,-1
2020-01-02 08:00:00,7169.51,7169.51,7116.48,7134.87,3485.338125,278107.574317,-1
2020-01-02 12:00:00,7135.52,7181.53,7105.17,7160.6,3261.590162,281369.164479,1


In [164]:
df_temp_1m_last = df_temp_1m[str(temp_signal_df_last.index[0]): str(temp_signal_df_last.index[-1])].copy()
df_temp_1m_last

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Open time (1M),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-01-01 00:00:00,7169.71,7169.71,7165.44,7167.83,3509.860
2020-01-01 00:01:00,7167.83,7168.28,7158.66,7159.95,3821.170
2020-01-01 00:02:00,7161.03,7165.40,7161.03,7162.46,3041.710
2020-01-01 00:03:00,7161.74,7164.27,7160.30,7161.03,3682.650
2020-01-01 00:04:00,7161.03,7164.25,7160.15,7160.15,2936.690
...,...,...,...,...,...
2020-01-04 23:56:00,7313.30,7314.82,7304.47,7304.59,2932.650
2020-01-04 23:57:00,7304.59,7306.77,7299.27,7300.23,2656.891
2020-01-04 23:58:00,7300.23,7303.15,7296.64,7297.37,2991.368
2020-01-04 23:59:00,7297.37,7305.82,7294.77,7305.62,2505.819


In [165]:
temp_backtest_df_last = generate_backtest_df(copy.deepcopy(temp_signal_df_last), df_temp_1m_last)
temp_backtest_df_last

Unnamed: 0_level_0,direction,entry price,close price,action,PNL,Balance
Open time (4h),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01 04:00:00,long,7202.48,0.0,,,1331.663504
2020-01-01 05:00:00,short,7202.48,9592.0,tp,33.17635,1331.663504
2020-01-01 08:00:00,short,7241.63,0.0,,,1338.898811
2020-01-01 16:00:00,long,7241.63,7201.56,direction,0.553328,1338.898811
2020-01-01 16:00:00,long,7201.56,0.0,,,1346.322483
2020-01-02 00:00:00,short,7201.56,7242.21,direction,0.564461,1346.322483
2020-01-02 00:00:00,short,7242.21,0.0,,,1366.021457
2020-01-02 12:00:00,long,7242.21,7135.52,direction,1.473169,1366.021457
2020-01-02 12:00:00,long,7135.52,0.0,,,1370.688076
2020-01-02 16:00:00,short,7135.52,7160.61,direction,0.351621,1370.688076


### Original Code's Time

In [81]:
%%timeit -r 1 -n 1
df_with_all_pnl = calculate_pnl_sum_all(copy.deepcopy(temp_backtest_df))
stats_dict, stats_df = calculate_all_statistics(df_with_all_pnl)
stats_df

done
1min 57s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### My Code's Time

In [191]:
%%timeit -r 5 -n 3
df_with_all_pnl = calculate_pnl_sum_all(copy.deepcopy(temp_backtest_df))
stats_dict, stats_df = calculate_all_statistics(df_with_all_pnl)
stats_df

done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
1.36 s ± 21.3 ms per loop (mean ± std. dev. of 5 runs, 3 loops each)


### Now, more changes

In [451]:
%%timeit -r 5 -n 3
df_with_all_pnl = calculate_pnl_sum_all(copy.deepcopy(temp_backtest_df))
stats_dict, stats_df = calculate_all_statistics(df_with_all_pnl)

done
done
done
done
done
done
done
done
done
done
done
done
done
done
done
233 ms ± 8.92 ms per loop (mean ± std. dev. of 5 runs, 3 loops each)


# ***More Later!***