# Backtesting Candlestick and Chart Patterns with Machine Learning
In this notebook, we'll explore the effectiveness of various candlestick and chart patterns in predicting stock price movements. Our approach combines traditional technical analysis with machine learning, specifically using XGBoost, to backtest these patterns on selected assets over a given timeframe.

In [None]:
pip install numpy pandas  xgboost scikit-learn yfinance backtrader pandas_ta

In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# You might need to install TA-Lib or another library for pattern recognition
# import talib as ta
import pandas_ta as ta
import yfinance as yf
# For backtesting, you can use Backtrader or another backtesting library
import backtrader as bt


# Data Acquisition
We will fetch historical data for the assets using the yfinance library.

In [None]:
def fetch_data(asset, start, end):
    data = yf.download(asset, start=start, end=end)
    return data
fetch_data('TQQQ', '','')

# Pattern Recognition
Using TA-Lib, we will identify various candlestick patterns in the data.

In [47]:
def add_candlestick_patterns(data):
    # Example: Adding a few candlestick patterns using pandas_ta
    data['Hammer'] = ta.cdl_pattern(name="cdlhammer", open_=data['Open'], high=data['High'], low=data['Low'], close=data['Close'])
    data['Engulfing'] = ta.cdl_pattern(name="cdlengulfing", open_=data['Open'], high=data['High'], low=data['Low'], close=data['Close'])
    # Add more patterns as needed

    # The cdl_pattern function returns a DataFrame, so we need to integrate it with the original data
    # The result contains columns named after the patterns with 0 (no pattern), -100 (bearish pattern), or +100 (bullish pattern)
    data = data.join(data['Hammer'])
    data = data.join(data['Engulfing'])
    
    # Clean up the temporary columns
    data.drop(['Hammer', 'Engulfing'], axis=1, inplace=True)

    return data

# Fetch sample data
# data = yf.download('AAPL', start='2021-01-01', end='2021-06-01')

# Call the function
# data_with_patterns = add_candlestick_patterns(data)

# Examine the first few rows of the modified DataFrame
# data_with_patterns.head()

# Data Preparation
This step involves preparing our dataset for the machine learning model, including feature creation and labeling.

In [None]:
def prepare_data(data):
    # Add features and labels for ML model
    # Example: Using the next day's return as a label
    data['Next_Close'] = data['Close'].shift(-1)
    data['Return'] = (data['Next_Close'] - data['Close']) / data['Close']
    data['Target'] = np.where(data['Return'] > 0, 1, 0)
    # Drop rows with NaN values
    data = data.dropna()
    X = data.drop(['Target', 'Return', 'Next_Close'], axis=1)
    y = data['Target']
    return X, y

# Model Training
Here, we'll train an XGBoost model to predict future price movements based on identified patterns.

In [None]:
def train_model(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    model = xgb.XGBClassifier()
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    print(f"Model Accuracy: {accuracy}")
    return model

# Backtesting Strategy
Using backtrader, we will backtest the predictions made by our model to evaluate its effectiveness.

In [None]:
def backtest_strategy(data, model):
    # Implement backtesting logic here
    # This is a placeholder function
    pass


# Main Execution
The main function orchestrates the process from data fetching to backtesting for each asset.

In [None]:
# Main function
def main():
    assets = ['SPY', 'TQQQ', 'SPSX']
    start = '2020-01-01'
    end = '2021-01-01'

    for asset in assets:
        print(f"Processing {asset}")
        data = fetch_data(asset, start, end)
        data_with_patterns = add_candlestick_patterns(data)
        X, y = prepare_data(data_with_patterns)
        model = train_model(X, y)
        backtest_strategy(data_with_patterns, model)

if __name__ == "__main__":
    main()