In [62]:
import yfinance as yf
from backtesting import Backtest, Strategy
from backtesting.lib import crossover,cross, SignalStrategy, TrailingStrategy
# from backtesting.test import SMA
from ta import momentum
import pandas as pd
import datetime as dt


def SMA(arr, n):
    return pd.Series(arr).rolling(n).mean()


def EMA(arr, n):
    return pd.Series(arr).ewm(span=n, adjust=False).mean()


def RSI_Indicator(arr, n):
    arr_series = pd.Series(arr)
    rsi_indicator = momentum.RSIIndicator(arr_series, window=n)
    return rsi_indicator.rsi()


class RsiStrategy(Strategy):
    def init(self):
        # Load the price and RSI
        price = self.data.Close
        self.rsi = self.I(RSI_Indicator, price, 14)

    def next(self):
        # Buy when RSI crosses above 30
        if crossover(self.rsi, 30):
            self.buy()
        # Sell when RSI crosses below 70
        elif crossover(70, self.rsi):
            self.sell()


class SmaCross(Strategy):
    n1 = 10
    n2 = 20
    
    def init(self):
        # SMA = pd.Series(arr).rolling(n).mean()
        self.ma1 = self.I(SMA, self.data.Close, self.n1)
        self.ma2 = self.I(SMA, self.data.Close, self.n2)

    def next(self):
        current_price = self.data.Close[-1]
        if crossover(self.ma1, self.ma2):
            self.buy( sl=current_price * 0.95)
        elif crossover(self.ma2, self.ma1):
            self.sell( sl=current_price * 1.05)



In [None]:

# symbol = 'AAPL'
symbol = 'GOOG'

# Define the start and end dates for the data
# start_date = '2022-01-11'
# end_date = '2023-01-01'

# set current date as end date
end_date = dt.datetime.now().strftime('%Y-%m-%d')

# Download the data using yfinance
data = yf.download(symbol, start="2021-01-01", end=end_date, interval='1d')


bt = Backtest(data, SmaCross, commission=.002,
              cash=2_000,
              exclusive_orders=True)
bt.run()

stats = bt.optimize(n1=range(5, 50, 1),
                    n2=range(10, 100, 1),
                    maximize='Win Rate [%]',
                    constraint=lambda param: param.n1 < param.n2)

bt.plot()
stats
stats._strategy
bt.plot(plot_volume=False, plot_pl=False)
stats.tail()
stats['_trades']


In [1]:
from typing import List, Generator
import numpy as np
from sklearn.model_selection._split import _BaseKFold
from sklearn.utils.validation import indexable, _num_samples


class MonteCarloCV(_BaseKFold):

    def __init__(self,
                 n_splits: int,
                 train_size: float,
                 test_size: float,
                 gap: int = 0):
        """
        Monte Carlo Cross-Validation

        Holdout applied in multiple testing periods
        Testing origin (time-step where testing begins) is randomly chosen according to a monte carlo simulation

        :param n_splits: (int) Number of monte carlo repetitions in the procedure
        :param train_size: (float) Train size, in terms of ratio of the total length of the series
        :param test_size: (float) Test size, in terms of ratio of the total length of the series
        :param gap: (int) Number of samples to exclude from the end of each train set before the test set.
        """

        self.n_splits = n_splits
        self.n_samples = -1
        self.gap = gap
        self.train_size = train_size
        self.test_size = test_size
        self.train_n_samples = 0
        self.test_n_samples = 0

        self.mc_origins = []

    def split(self, X, y=None, groups=None) -> Generator:
        """Generate indices to split data into training and test set.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training data, where `n_samples` is the number of samples
            and `n_features` is the number of features.
        y : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        groups : array-like of shape (n_samples,)
            Always ignored, exists for compatibility.
        Yields
        ------
        train : ndarray
            The training set indices for that split.
        test : ndarray
            The testing set indices for that split.
        """

        X, y, groups = indexable(X, y, groups)
        self.n_samples = _num_samples(X)

        self.train_n_samples = int(self.n_samples * self.train_size) - 1
        self.test_n_samples = int(self.n_samples * self.test_size) - 1

        # Make sure we have enough samples for the given split parameters
        if self.n_splits > self.n_samples:
            raise ValueError(
                f'Cannot have number of folds={self.n_splits} greater'
                f' than the number of samples={self.n_samples}.'
            )
        if self.train_n_samples - self.gap <= 0:
            raise ValueError(
                f'The gap={self.gap} is too big for number of training samples'
                f'={self.train_n_samples} with testing samples={self.test_n_samples} and gap={self.gap}.'
            )

        indices = np.arange(self.n_samples)

        selection_range = np.arange(self.train_n_samples + 1, self.n_samples - self.test_n_samples - 1)

        self.mc_origins = \
            np.random.choice(a=selection_range,
                             size=self.n_splits,
                             replace=True)

        for origin in self.mc_origins:
            if self.gap > 0:
                train_end = origin - self.gap + 1
            else:
                train_end = origin - self.gap
            train_start = origin - self.train_n_samples - 1

            test_end = origin + self.test_n_samples

            yield (
                indices[train_start:train_end],
                indices[origin:test_end],
            )

    def get_origins(self) -> List[int]:
        return self.mc_origins



In [63]:
import pandas as pd
import numpy as np

np.random.seed(0)

EURUSD = pd.read_table('/Users/newuser/Projects/robust-algo-trader/data/EURUSD_H1_200702210000_202304242100.tsv')
# remove the following columns <TICKVOL>, <VOL> and <SPREAD>
EURUSD = EURUSD.drop(['<TICKVOL>', '<VOL>', '<SPREAD>'], axis=1)
# rename the columns
EURUSD = EURUSD.rename(columns={'<DATE>': 'Date', 
                                '<TIME>': 'Time', 
                                '<OPEN>': 'Open', 
                                '<HIGH>': 'High', 
                                '<LOW>': 'Low', 
                                '<CLOSE>': 'Close'})
# combine the date and time columns
EURUSD['Date_Time'] = EURUSD['Date'] + ' ' + EURUSD['Time']
# convert the date_time column to datetime
EURUSD['Date_Time'] = pd.to_datetime(EURUSD['Date_Time'], format='%Y%m%d %H:%M:%S.%f')
# remove the date and time columns
EURUSD = EURUSD.drop(['Date', 'Time'], axis=1)
# display the first few rows
# EURUSD.head()

# print(EURUSD)

mccv = MonteCarloCV(n_splits=5, 
                    train_size=0.6, 
                    test_size=0.1, 
                    gap=0)

for train_index, test_index in mccv.split(EURUSD):
    # print("TRAIN:", train_index, "TEST:", test_index)
    train_set = EURUSD.loc[train_index]
    # set the date_time column as index
    train_set = train_set.set_index('Date_Time')
    bt = Backtest(train_set, SmaCross, commission=.002,
              cash=10_000,
              exclusive_orders=True)
    bt.run()
    stats = bt.optimize(n1=range(5, 50, 1),
                    n2=range(10, 100, 1),
                    maximize='Win Rate [%]',
                    max_tries=100,
                    random_state=0,
                    constraint=lambda param: param.n1 < param.n2)
    bt.plot()
    
    print(stats)
    break
    
    # X_train, X_test = EURUSD[train_index], EURUSD[test_index]
    # y_train, y_test = EURUSD[train_index], EURUSD[test_index]



  0%|          | 0/9 [00:00<?, ?it/s]

  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='nearest')
  new_bar_idx = new_index.get_loc(mean_time, method='ne

Start                     2007-07-31 22:00:00
End                       2017-04-20 16:00:00
Duration                   3550 days 18:00:00
Exposure Time [%]                   99.773729
Equity Final [$]                  2286.945037
Equity Peak [$]                   11552.36653
Return [%]                          -77.13055
Buy & Hold Return [%]               -21.32354
Return (Ann.) [%]                  -13.686691
Volatility (Ann.) [%]                9.404376
Sharpe Ratio                              0.0
Sortino Ratio                             0.0
Calmar Ratio                              0.0
Max. Drawdown [%]                  -80.386864
Avg. Drawdown [%]                   -2.046439
Max. Drawdown Duration     3026 days 04:00:00
Avg. Drawdown Duration       60 days 01:00:00
# Trades                                  812
Win Rate [%]                         32.26601
Best Trade [%]                       9.788291
Worst Trade [%]                     -3.244486
Avg. Trade [%]                    