# Requirments
- Python 3.11.9
    - pyenv (https://github.com/pyenv/pyenv)
- uv (rust based - python package manger)
    - _On macOS and Linux_
        - `curl -LsSf https://astral.sh/uv/install.sh | sh`
    - _windows_
        - coming soon
- Install the latest `backtesting.py` version 
    - `pip install git+https://github.com/kernc/backtesting.py.git`
        - This includes fixes for bokeh breaking when you are trying to plot
- TA Lib
    - `brew install ta-lib` - need ta-lib c library to work with ta-lib python
    - `pip install numpy==1.26.4` - need to ensure that we are using a numpy version lower then 2.0
- Jupyter Noteboks deps
    - `pip install ipykernel` for jupyter notebooks
- Install additional deps 
    - `pip install pandas ccxt`
    - Pandas dataframes
    - CCXT connection to various crypto exchanges

### Todos
- [x] - Update candle stick retrival to ensure that we are getting the full range that we are requesting
- [ ] - Update strategy to also include stop loss, attempt to minimize risk as much as possible
- [ ] - ADD RR using ATR make sure that it doesn't cause a infinite loop
- [ ] - Figure out the best indicator for exiting a position with maximum upside
- [ ] - get yearly data for BTC on all time frames to see which one is best
- [ ] - Update candlestick data storage to parquet files


In [1]:
import yfinance as yf
from backtesting import Backtest, Strategy
from backtesting.lib import crossover
import pandas as pd
# import numpy as np
import talib
import ccxt
import datetime
import time
# print(np.__version__)

%load_ext autoreload
%autoreload 2

## Strategy EMACross

EMA 20 - EMA 50

In [None]:
class EMACrossStrategy(Strategy):
    ema_fast = 20
    ema_slow = 50
    trade_size = 0.05  # Use % of available equity for each trade
    take_profit_pct = 0.02  # 1% take profit

    def init(self):
        close = self.data.Close
        self.ema1 = self.I(talib.EMA, close, timeperiod=self.ema_fast)
        self.ema2 = self.I(talib.EMA, close, timeperiod=self.ema_slow)
        self.entry_price = 0

    def next(self):
        price = self.data.Close[-1]
        print(
            f"Current index: {self.data.index[-1]}, Close: {price}, EMA{self.ema_fast}: {self.ema1[-1]}, EMA{self.ema_slow}: {self.ema2[-1]}"
        )
        print(f"Current position size: {self.position.size}")
        print(f"Current equity: {self.equity}")
        size = max(1, int((self.trade_size * self.equity) // price))
        print(f"Calculated trade size: {size}")

        # Check for take profit
        if self.position.size != 0:
            pnl_pct = (price - self.entry_price) / self.entry_price
            if (self.position.size > 0 and pnl_pct >= self.take_profit_pct) or (
                self.position.size < 0 and pnl_pct <= -self.take_profit_pct
            ):
                print(
                    f"Take profit triggered at {self.data.index[-1]}, closing position"
                )
                self.position.close()
                self.entry_price = 0

        if crossover(self.ema1, self.ema2):
            if self.position.size < 0:
                self.position.close()
            print(
                f"Buy signal at {self.data.index[-1]}, attempting to buy {size} units"
            )
            self.buy(size=size)
            self.entry_price = price
        elif crossover(self.ema2, self.ema1):
            if self.position.size > 0:
                self.position.close()
            print(
                f"Sell signal at {self.data.index[-1]}, attempting to sell {size} units"
            )
            self.sell(size=size)
            self.entry_price = price

        print(f"Position after signal check: {self.position.size}")

## Retrieve candle stick data 
- requesting data from phemex upto a certain point with chunking to prevent rate liming

In [None]:
# Initialize the exchange - Using Phemex
exchange = ccxt.phemex({"enableRateLimit": True})

# Define the symbols
symbol = "BTCUSDT"  # This is the correct symbol for Phemex USDT-margined perpetual
timeframe = "15m"

# Calculate start and end times
end_time = datetime.datetime.now()
start_time = end_time - datetime.timedelta(days=180)  # 6 months ago
### TODO: create a formula to figure out how many candle i would be expecting back depending on the timeframe and the start_time


# Function to fetch data in chunks
def fetch_ohlcv_data(start_time, end_time):
    all_candles = []
    current_time = start_time
    while current_time < end_time:
        try:
            print(f"Fetching data from {current_time}")
            candles = exchange.fetch_ohlcv(
                symbol,
                timeframe,
                since=int(current_time.timestamp() * 1000),
                limit=1000,  # Phemex allows up to 1000 candles per request
            )
            if not candles:
                break
            all_candles.extend(candles)

            # Update current_time to the last candle's time + 1 timeframe
            last_candle_time = datetime.datetime.fromtimestamp(candles[-1][0] / 1000)
            if timeframe == "15m":
                current_time = last_candle_time + datetime.timedelta(minutes=15)
            elif timeframe == "1h":
                current_time = last_candle_time + datetime.timedelta(hours=1)
            elif timeframe == "4h":
                current_time = last_candle_time + datetime.timedelta(hours=4)
            elif timeframe == "1d":
                current_time = last_candle_time + datetime.timedelta(days=1)
            else:
                raise ValueError(f"Unsupported timeframe: {timeframe}")

            print(f"Fetched {len(candles)} candles. Next fetch from {current_time}")

            # Sleep to respect rate limits
            time.sleep(exchange.rateLimit / 1000)  # rateLimit is in milliseconds
        except ccxt.NetworkError as e:
            print(f"Network error occurred: {str(e)}. Retrying in 10 seconds...")
            time.sleep(10)
        except ccxt.ExchangeError as e:
            print(f"Exchange error occurred: {str(e)}. Stopping.")
            break

    return all_candles


# Fetch the data
candles = fetch_ohlcv_data(start_time, end_time)

# Create DataFrame
df = pd.DataFrame(
    candles, columns=["Timestamp", "Open", "High", "Low", "Close", "Volume"]
)

print(df)

# Set timestamp as index
df["Timestamp"] = pd.to_datetime(df["Timestamp"], unit="ms")
df.set_index("Timestamp", inplace=True)

df  # Return the dataframe for display in Jupyter

### save the candles dataframe to a csv on disk

In [None]:
### Save last dataframe
output_filename = "BTCUSDT_15m_20240216_20240816.csv"
df.to_csv(output_filename)

### Load the saved candle stick data frame

In [None]:
output_filename = "BTCUSDT_15m_20240216_20240816.csv"
df = pd.read_csv(output_filename)

# Covert the 'Timestamp' column to datetime
df["Timestamp"] = pd.to_datetime(df["Timestamp"])

df.set_index("Timestamp", inplace=True)

df.sort_index(inplace=True)

for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors="coerce")

df.dropna(inplace=True)
df

### Run the strategy and plot it

In [None]:
# Create the Backtest instance
bt = Backtest(
    df, EMACrossStrategy, cash=100000, commission=0.002, exclusive_orders=True
)

# Run the backtest
stats = bt.run()

# Print the results
print(stats)

# Plot the results
bt.plot()