# Requirments
- Python 3.11.9
    - pyenv (https://github.com/pyenv/pyenv)
- uv (rust based - python package manger)
    - _On macOS and Linux_
        - `curl -LsSf https://astral.sh/uv/install.sh | sh`
    - _windows_
        - coming soon
- Install the latest `backtesting.py` version 
    - `pip install git+https://github.com/kernc/backtesting.py.git`
        - This includes fixes for bokeh breaking when you are trying to plot
- TA Lib
    - `brew install ta-lib` - need ta-lib c library to work with ta-lib python
    - `pip install numpy==1.26.4` - need to ensure that we are using a numpy version lower then 2.0
- Jupyter Noteboks deps
    - `pip install ipykernel` for jupyter notebooks
- Install additional deps 
    - `pip install pandas ccxt pyarrow termcolor progress`
    - Pandas dataframes
    - CCXT connection to various crypto exchanges

### Todos
- [x] [DONE] Update candle stick retrival to ensure that we are getting the full range that we are requesting
- [x] [DONE] Get yearly data 15m for BTC on all time frames to see which one is best
- [x] [DONE] Update candlestick data storage to parquet files
- [x] [DONE] Dynamic position size
- [x] [DONE] Update strategy to also include stop loss, attempt to minimize risk as much as possible
- [ ] ADD RR using ATR make sure that it doesn't cause a infinite loop
- [ ] Figure out the best indicator for exiting a position with maximum upside

In [None]:
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
import numpy as np
import pandas as pd
import talib
import os
import glob
import time
from tqdm.notebook import tqdm
# import matplotlib.pyplot as plt
# import seaborn as sns
import multiprocessing
multiprocessing.set_start_method('fork', force=True)

%load_ext autoreload
%autoreload 2

## Strategy EMACross

EMA 20 X EMA 50

In [None]:
class ImprovedEMACrossStrategy(Strategy):
    ema_fast = 15
    ema_slow = 50
    trade_size = 0.02
    take_profit_pct = 0.04
    stop_loss_pct = 0.01
    atr_period = 10
    rsi_period = 14
    rsi_overbought = 70
    rsi_oversold = 25
    atr_multiplier = 2.5
    trend_strength = 0.0005

    def init(self):
        close = self.data.Close
        high = self.data.High
        low = self.data.Low
        self.ema1 = self.I(talib.EMA, close, timeperiod=self.ema_fast)
        self.ema2 = self.I(talib.EMA, close, timeperiod=self.ema_slow)
        self.atr = self.I(talib.ATR, high, low, close, timeperiod=self.atr_period)
        self.rsi = self.I(talib.RSI, close, timeperiod=self.rsi_period)
        self.entry_price = 0

    def next(self):
        price = self.data.Close[-1]
        take_profit = self.atr[-1] * self.atr_multiplier
        stop_loss = self.atr[-1] * (self.atr_multiplier / 2)
        size = max(1, int((self.trade_size * self.equity) // price))

        if self.position.size != 0:
            pnl = price - self.entry_price
            if self.position.size > 0:
                if pnl >= take_profit or pnl <= -stop_loss:
                    self.position.close()
                    self.entry_price = 0
            elif self.position.size < 0:
                if pnl <= -take_profit or pnl >= stop_loss:
                    self.position.close()
                    self.entry_price = 0

        trend_strength = abs((self.ema1[-1] - self.ema2[-1]) / self.ema2[-1])
        if crossover(self.ema1, self.ema2) and trend_strength > self.trend_strength:
            if self.position.size < 0:
                self.position.close()
            if self.rsi[-1] < self.rsi_overbought:
                self.buy(size=size)
                self.entry_price = price
        elif crossover(self.ema2, self.ema1) and trend_strength > self.trend_strength:
            if self.position.size > 0:
                self.position.close()
            if self.rsi[-1] > self.rsi_oversold:
                self.sell(size=size)
                self.entry_price = price

### Load the saved candle stick data frame
_only run this cell when you want after you the have dynamically created the outputfile name_

In [None]:
def find_candlestick_file(
    timeframe, exchange, symbol="BTCUSDT", storage_format="parquet"
):
    # Base directory to search in
    base_directory = "./saved_candlestick_data/"
    # Full directory path including the exchange and storage format
    directory = os.path.join(base_directory, exchange, storage_format)

    # Create the pattern to match the filename based on the exchange
    if exchange == "binance":
        pattern = f"{exchange}_{symbol}_{timeframe}_*"
    else:  # for phemex
        pattern = f"{symbol}_{timeframe}_*"

    # Add the appropriate file extension
    if storage_format == "parquet":
        pattern += ".parquet"
    elif storage_format == "csv":
        pattern += ".csv"
    else:
        raise ValueError("Unsupported storage format. Use 'parquet' or 'csv'.")

    # Search for files matching the pattern in the given directory
    matching_files = glob.glob(os.path.join(directory, pattern))

    if not matching_files:
        return None
    elif len(matching_files) > 1:
        # If multiple files are found, return the most recent one
        return max(matching_files, key=os.path.getctime)
    else:
        return matching_files[0]


# Usage example:
timeframe = "15m"
exchange = "binance"  # or "phemex"
symbol = "BTCUSDT"
storage_format = "parquet"  # or "csv"

filename = find_candlestick_file(timeframe, exchange, symbol, storage_format)

if filename:
    print(f"Found file: {filename}")
    if storage_format == "parquet":
        df = pd.read_parquet(filename)
    else:  # csv
        df = pd.read_csv(filename)
    # Now you can work with your dataframe
    print(df.head())
else:
    print(
        f"No file found for {symbol} with timeframe {timeframe} on {exchange} in {storage_format} format"
    )

### Run the strategy and plot it

In [None]:
# Create the Backtest instance
bt = Backtest(
    df,
    ImprovedEMACrossStrategy,
    cash=100000,
    commission=0.002,
    exclusive_orders=True,
)

# Run the backtest
stats = bt.run()

# Print the results
print(stats)

# Plot the results
bt.plot()

## Optimize Strategy

In [None]:
# Define the parameter ranges for optimization
optimization_params = {
    "ema_fast": range(5, 21, 5),
    "ema_slow": range(20, 51, 10),
    "trade_size": [0.01, 0.02, 0.03],
    "take_profit_pct": [0.03, 0.04, 0.05],
    "stop_loss_pct": [0.01, 0.02, 0.03],
    "atr_period": [10, 14, 18],
    "rsi_period": [10, 14, 18],
    "rsi_overbought": [65, 70, 75],
    "rsi_oversold": [25, 30, 35],
    "atr_multiplier": [1.5, 2, 2.5],
    "trend_strength": [0.0005, 0.001, 0.0015],
}


def constraint(params):
    return (
        params.ema_fast < params.ema_slow
        and params.take_profit_pct > params.stop_loss_pct
        and params.rsi_overbought > 50
        and params.rsi_oversold < 50
        and (params.rsi_overbought - params.rsi_oversold) >= 20
        and params.atr_multiplier * 0.5 >= params.stop_loss_pct
        and params.trade_size <= 0.05
    )


results, heatmap = bt.optimize(
    **optimization_params,
    maximize="Equity Final [$]",
    # constraint=constraint,
    max_tries=2000,
    # method="skopt"
    return_heatmap=True
)

"""
stats, heatmap = bt.optimize(take_profit=np.arange(1, 8, 1).tolist(),
                                 deviation=np.arange(1, 8, 1).tolist(),
                                 percent=np.arange(5, 20, 5).tolist(),
                                 maximize="Equity Final [$]",
                                 method="skopt",
                                 max_tries=200,
                                 return_heatmap=True)
"""

print(heatmap)

# Print the results
print(results)

# Access the best parameters
best_params = results._strategy

print(best_params)

# Run the backtest with the best parameters
# best_run = bt.run(**best_params)

# Plot the results of the best run
# bt.plot()

## Debugging

In [None]:
## check the size of the saved data
!ll