# Requirments
- Python 3.11.9
    - pyenv (https://github.com/pyenv/pyenv)
- uv (rust based - python package manger)
    - _On macOS and Linux_
        - `curl -LsSf https://astral.sh/uv/install.sh | sh`
    - _windows_
        - coming soon
- Install the latest `backtesting.py` version 
    - `pip install git+https://github.com/kernc/backtesting.py.git`
        - This includes fixes for bokeh breaking when you are trying to plot
- TA Lib
    - `brew install ta-lib` - need ta-lib c library to work with ta-lib python
    - `pip install numpy==1.26.4` - need to ensure that we are using a numpy version lower then 2.0
- Jupyter Noteboks deps
    - `pip install ipykernel` for jupyter notebooks
- Install additional deps 
    - `pip install pandas ccxt`
    - Pandas dataframes
    - CCXT connection to various crypto exchanges

### Todos
- [x] [DONE] Update candle stick retrival to ensure that we are getting the full range that we are requesting
- [x] [DONE] Get yearly data 15m for BTC on all time frames to see which one is best
- [x] [DONE] Update candlestick data storage to parquet files
- [x] [DONE] Dynamic position size
- [x] [DONE] Update strategy to also include stop loss, attempt to minimize risk as much as possible
- [ ] ADD RR using ATR make sure that it doesn't cause a infinite loop
- [ ] Figure out the best indicator for exiting a position with maximum upside

In [2]:
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
import pandas as pd
import talib
import os
import glob

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Strategy EMACross

EMA 20 X EMA 50

In [6]:
class ImprovedEMACrossStrategy(Strategy):
    ema_fast = 10
    ema_slow = 30
    trade_size = 0.02
    take_profit_pct = 0.04
    stop_loss_pct = 0.02
    atr_period = 14
    rsi_period = 14
    rsi_overbought = 70
    rsi_oversold = 30
    atr_multiplier = 2
    trend_strength = 0.001

    def init(self):
        close = self.data.Close
        high = self.data.High
        low = self.data.Low

        self.ema1 = self.I(talib.EMA, close, timeperiod=self.ema_fast)
        self.ema2 = self.I(talib.EMA, close, timeperiod=self.ema_slow)
        self.atr = self.I(talib.ATR, high, low, close, timeperiod=self.atr_period)
        self.rsi = self.I(talib.RSI, close, timeperiod=self.rsi_period)
        self.entry_price = 0

    def next(self):
        price = self.data.Close[-1]

        # Dynamic take profit and stop loss based on ATR
        take_profit = self.atr[-1] * self.atr_multiplier
        stop_loss = self.atr[-1] * (self.atr_multiplier / 2)

        size = max(1, int((self.trade_size * self.equity) // price))

        # Check for take profit or stop loss
        if self.position.size != 0:
            pnl = price - self.entry_price
            if self.position.size > 0:
                if pnl >= take_profit or pnl <= -stop_loss:
                    self.position.close()
                    self.entry_price = 0
            elif self.position.size < 0:
                if pnl <= -take_profit or pnl >= stop_loss:
                    self.position.close()
                    self.entry_price = 0

        # Check for strong trend
        trend_strength = abs((self.ema1[-1] - self.ema2[-1]) / self.ema2[-1])

        if crossover(self.ema1, self.ema2) and trend_strength > self.trend_strength:
            if self.position.size < 0:
                self.position.close()
            if self.rsi[-1] < self.rsi_overbought:
                self.buy(size=size)
                self.entry_price = price
        elif crossover(self.ema2, self.ema1) and trend_strength > self.trend_strength:
            if self.position.size > 0:
                self.position.close()
            if self.rsi[-1] > self.rsi_oversold:
                self.sell(size=size)
                self.entry_price = price

### Load the saved candle stick data frame
_only run this cell when you want after you the have dynamically created the outputfile name_

In [7]:
def find_candlestick_file(
    timeframe, exchange, symbol="BTCUSDT", storage_format="parquet"
):
    # Base directory to search in
    base_directory = "./saved_candlestick_data/"
    # Full directory path including the exchange and storage format
    directory = os.path.join(base_directory, exchange, storage_format)

    # Create the pattern to match the filename based on the exchange
    if exchange == "binance":
        pattern = f"{exchange}_{symbol}_{timeframe}_*"
    else:  # for phemex
        pattern = f"{symbol}_{timeframe}_*"

    # Add the appropriate file extension
    if storage_format == "parquet":
        pattern += ".parquet"
    elif storage_format == "csv":
        pattern += ".csv"
    else:
        raise ValueError("Unsupported storage format. Use 'parquet' or 'csv'.")

    # Search for files matching the pattern in the given directory
    matching_files = glob.glob(os.path.join(directory, pattern))

    if not matching_files:
        return None
    elif len(matching_files) > 1:
        # If multiple files are found, return the most recent one
        return max(matching_files, key=os.path.getctime)
    else:
        return matching_files[0]


# Usage example:
timeframe = "15m"
exchange = "binance"  # or "phemex"
symbol = "BTCUSDT"
storage_format = "parquet"  # or "csv"

filename = find_candlestick_file(timeframe, exchange, symbol, storage_format)

if filename:
    print(f"Found file: {filename}")
    if storage_format == "parquet":
        df = pd.read_parquet(filename)
    else:  # csv
        df = pd.read_csv(filename)
    # Now you can work with your dataframe
    print(df.head())
else:
    print(
        f"No file found for {symbol} with timeframe {timeframe} on {exchange} in {storage_format} format"
    )

Found file: ./saved_candlestick_data/binance/parquet/binance_BTCUSDT_15m_20230818_20240817.parquet
                         Open      High       Low     Close     Volume
Timestamp                                                             
2023-08-18 23:00:00  26071.99  26094.49  26051.91  26051.91  175.29393
2023-08-18 23:15:00  26051.91  26062.82  26022.81  26028.47  504.91311
2023-08-18 23:30:00  26028.47  26044.01  26006.01  26031.53  309.45135
2023-08-18 23:45:00  26031.53  26059.91  26026.00  26054.00  154.77265
2023-08-19 00:00:00  26054.00  26080.20  26048.67  26069.70  359.74408


### Run the strategy and plot it

In [8]:
# Create the Backtest instance
bt = Backtest(
    df,
    ImprovedEMACrossStrategy,
    cash=100000,
    commission=0.002,
    exclusive_orders=True,
)

# Run the backtest
stats = bt.run()

# Print the results
print(stats)

# Plot the results
bt.plot()

Start                     2023-08-18 23:00:00
End                       2024-08-17 22:45:00
Duration                    364 days 23:45:00
Exposure Time [%]                    0.650685
Equity Final [$]                 101794.19408
Equity Peak [$]                  103233.88644
Return [%]                           1.794194
Buy & Hold Return [%]              127.704994
Return (Ann.) [%]                    1.789248
Volatility (Ann.) [%]                2.883298
Sharpe Ratio                         0.620556
Sortino Ratio                        1.127792
Calmar Ratio                         0.722458
Max. Drawdown [%]                   -2.476614
Avg. Drawdown [%]                    -1.31743
Max. Drawdown Duration      157 days 09:00:00
Avg. Drawdown Duration       58 days 02:54:00
# Trades                                   19
Win Rate [%]                        47.368421
Best Trade [%]                       2.840427
Worst Trade [%]                     -1.518999
Avg. Trade [%]                    

  df = df.resample(freq, label='right').agg(OHLCV_AGG).dropna()
  indicators = [_Indicator(i.df.resample(freq, label='right').mean()
  indicators = [_Indicator(i.df.resample(freq, label='right').mean()
  indicators = [_Indicator(i.df.resample(freq, label='right').mean()
  indicators = [_Indicator(i.df.resample(freq, label='right').mean()
  equity_data = equity_data.resample(freq, label='right').agg(_EQUITY_AGG).dropna(how='all')
  trades = trades.assign(count=1).resample(freq, on='ExitTime', label='right').agg(dict(
  mean_time = int(bars.loc[s.index].view(int).mean())


## Debugging

In [None]:
## check the size of the saved data
!ll