In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import talib
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import numpy as np


In [2]:
from binance.client import Client

# sys.path.append(os.path.abspath(".."))  # root /PycharmProjects/MMAT
from config.load_env import load_keys

keys = load_keys()
#print("Loaded keys:", keys)
client = Client(keys['api_key'], keys['secret_key'])

### Load Data from Historical 1yr csv file:

In [3]:
def load_data(csv_path):
    try:
        df = pd.read_csv(csv_path, index_col='timestamp', parse_dates=True)
        df = df[['open', 'high', 'low', 'close', 'volume']].copy()
        print(f"Total K-lines loaded: {len(df)}")
        return df
    except FileNotFoundError:
        print(f"CSV file '{csv_path}' not found.")
        return None


### Resample to 15 min (Can try other different Time horizon):

In [4]:
def resample_to_15min(df):
    df_15min = df.resample('15min').agg({
        'open': 'first',
        'high': 'max',
       'low': 'min',
        'close': 'last',
        'volume': 'sum'
    }).dropna()
    print(f"Resampled to 15min, total K-lines: {len(df_15min)}")
    return df_15min

### Import Indicators :

#### (Below are RSI, MA5days, MA20days, Volume, ATR, Mean ATR, MACD） --> feel free to adjust the time period or add other indicators):
#### In this file, these indicators havent combined with Candlestick patterns yet, they are just for plotting used

In [5]:
def calculate_indicators(df):
    df['RSI'] = talib.RSI(df['close'], timeperiod=14)
    df['MA20'] = talib.SMA(df['close'], timeperiod=20)
    df['MA50'] = talib.SMA(df['close'], timeperiod=50)
    return df

### Try 19 CandleStick Patterns:

| Bullish Patterns     | Bearish Patterns     | 
|----------------------|----------------------|
| Hammer              | HangingMan          |           
| InvertedHammer      | ShootingStar        |                  
| BullishEngulfing    | BearishEngulfing    |                  
| PiercingLine        | DarkCloudCover      |                  
| MorningDojiStar     | EveningDojiStar     |                  
| MorningStar         | EveningStar         |                 
| ThreeWhiteSoldiers  | ThreeBlackCrows     |                  
| DragonflyDoji       | GravestoneDoji      |                  
| UpsideTasukiGap     |                     |                  
| `ThreeLineStrike`     | `ThreeLineStrike`     |                  
| `LongLine`            | `LongLine`            |                  

**Notes**:
- `ThreeLineStrike` and `LongLine` appear in both Bullish and Bearish columns because they can generate bullish (`100`) or bearish (`-100`) signals depending on the candlestick direction.


In [6]:
def calculate_patterns(df):
    patterns = {
        'Hammer': talib.CDLHAMMER,
        'InvertedHammer': talib.CDLINVERTEDHAMMER,
        'BullishEngulfing': lambda o, h, l, c: np.where(talib.CDLENGULFING(o, h, l, c) == 100, 100, 0),
        'PiercingLine': talib.CDLPIERCING,
        'MorningDojiStar': talib.CDLMORNINGDOJISTAR,
        'MorningStar': talib.CDLMORNINGSTAR,
        'ThreeWhiteSoldiers': talib.CDL3WHITESOLDIERS,
        'HangingMan': talib.CDLHANGINGMAN,
        'ShootingStar': talib.CDLSHOOTINGSTAR,
        'BearishEngulfing': lambda o, h, l, c: np.where(talib.CDLENGULFING(o, h, l, c) == -100, -100, 0),
        'DarkCloudCover': talib.CDLDARKCLOUDCOVER,
        'EveningDojiStar': talib.CDLEVENINGDOJISTAR,
        'EveningStar': talib.CDLEVENINGSTAR,
        'ThreeBlackCrows': talib.CDL3BLACKCROWS,
        'ThreeLineStrike': talib.CDL3LINESTRIKE,
        'DragonflyDoji': talib.CDLDRAGONFLYDOJI,
        'GravestoneDoji': talib.CDLGRAVESTONEDOJI,
        'UpsideTasukiGap': talib.CDLUPSIDEGAP2CROWS,
        'LongLine': talib.CDLLONGLINE
    }

    # Apply each pattern function to create columns in the DataFrame
    for name, pattern_func in patterns.items():
        df[name] = pattern_func(df['open'].values, df['high'].values, df['low'].values, df['close'].values)

    # Calculate and print the count of non-zero occurrences for each pattern
    for name in patterns.keys():
        count = (df[name].abs() > 0).sum()
        print(f"{name} detected {count} times.")

    return df, patterns


### Generate Bullish/Bearish Signal based on Ta-Lib definition:

- The `generate_signals()` function only uses information from the current or past candlesticks (e.g., open, high, low, close up to time *t*)
- It does not rely on future price movements (e.g., it avoids using `shift(-1) future price` or forward-looking returns to decide a signal)

In [7]:
# Ensure we only use past data for signal generation
# This function assumes all indicator values are based on past prices only

def generate_signals(df, patterns, window=1):
    # Initialize signal and direction columns for each pattern
    for name in patterns.keys():
        df[f'Signal_{name}'] = 0
        df[f'Direction_{name}'] = 'NONE'

    # Group patterns for structured processing
    bullish_patterns_strong = ['BullishEngulfing', 'ThreeLineStrike']
    bullish_patterns = ['Hammer', 'InvertedHammer', 'PiercingLine', 'MorningDojiStar',
                        'MorningStar', 'ThreeWhiteSoldiers', 'DragonflyDoji', 'UpsideTasukiGap', 'LongLine']
    bearish_patterns_strong = ['BearishEngulfing']
    bearish_patterns = ['HangingMan', 'ShootingStar', 'DarkCloudCover', 'EveningDojiStar',
                        'EveningStar', 'ThreeBlackCrows', 'GravestoneDoji']

    for i in range(1, len(df) - window):
        for name in patterns.keys():
            value = df[name].iloc[i]

            # Strong bullish patterns: signal only when value == 100
            if name in bullish_patterns_strong and value == 100:
                df.loc[df.index[i], f'Signal_{name}'] = 1
                df.loc[df.index[i], f'Direction_{name}'] = 'UP'

            # General bullish patterns: signal when value > 0 (includes 1 or 100)
            elif name in bullish_patterns and value > 0:
                df.loc[df.index[i], f'Signal_{name}'] = 1
                df.loc[df.index[i], f'Direction_{name}'] = 'UP'

            # Strong bearish patterns: signal only when value == -100
            elif name in bearish_patterns_strong and value == -100:
                df.loc[df.index[i], f'Signal_{name}'] = -1
                df.loc[df.index[i], f'Direction_{name}'] = 'DOWN'

            # General bearish patterns: signal when value < 0 (includes -1 or -100)
            elif name in bearish_patterns and value < 0:
                df.loc[df.index[i], f'Signal_{name}'] = -1
                df.loc[df.index[i], f'Direction_{name}'] = 'DOWN'

            # Special case: GravestoneDoji returns 100 but is bearish by definition
            elif name == 'GravestoneDoji' and value == 100:
                df.loc[df.index[i], f'Signal_{name}'] = -1
                df.loc[df.index[i], f'Direction_{name}'] = 'DOWN'

    return df


### Evaluate Accuracy Rate for each CandlesStick Pattern Signal: 

In [8]:
def evaluate_patterns(df, patterns, window=1, threshold=0.0005):
    """
    Evaluate the accuracy of each candlestick pattern signal.

    Parameters:
    - df: DataFrame containing candlestick data and signal columns (e.g., Signal_Hammer)
    - patterns: dict of pattern name to TA-Lib function
    - window: holding period (e.g., how many bars into the future to measure return)
    - threshold: the minimum return threshold to consider a signal as "correct"

    Returns:
    - Dictionary with accuracy, total_signals, and correct_signals for each pattern
    """

    results = {}

    # === Step 1: Calculate future return (target variable) ===
    # next_close uses future price at t+window (e.g., t+1), so we can compare it against the signal generated at time t
    df['next_close'] = df['close'].shift(-window)  # ← this is safe: we only use this for *evaluation*, not for generating signals
    df['return'] = (df['next_close'] - df['close']) / df['close']  # ← return from time t to t+window

    for name in patterns.keys():
        signal_col = f'Signal_{name}'

        # Skip if signal column doesn't exist (e.g., pattern not used in generate_signals)
        if signal_col not in df.columns:
            results[name] = {
                'accuracy': 0,
                'total_signals': 0,
                'correct_signals': 0
            }
            continue

        # Only consider rows where the signal is active (1 = bullish, -1 = bearish)
        signals = df[df[signal_col] != 0]
        total_signals = len(signals)

        if total_signals == 0:
            results[name] = {
                'accuracy': 0,
                'total_signals': 0,
                'correct_signals': 0
            }
            continue

        # === Step 2: Count correct signals ===
        # A signal is "correct" if the return moves in the expected direction and exceeds the threshold
        correct_signals = len(signals[
            ((signals[signal_col] == 1) & (df.loc[signals.index, 'return'] >= threshold)) |  # Bullish signal → price should rise
            ((signals[signal_col] == -1) & (df.loc[signals.index, 'return'] <= -threshold))   # Bearish signal → price should fall
        ])

        # === Step 3: Compute accuracy ===
        accuracy = correct_signals / total_signals * 100

        results[name] = {
            'accuracy': accuracy,
            'total_signals': total_signals,
            'correct_signals': correct_signals
        }

    return results


### Plot Top3 Candlestick Pattern Result : 

In [9]:
import os
import webbrowser
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

def plot_pattern_results(df, patterns, symbol, max_points=2000, buffer=50, open_browser=True):
    """
    Plots candlestick charts with pattern signals, ensuring no future leakage and index alignment.

    Parameters:
    - df: DataFrame with signals already generated
    - patterns: list of pattern names (e.g. ['BullishEngulfing', 'ThreeLineStrike'])
    - symbol: str, name of the instrument
    - max_points: how many points to show in the plot (excluding buffer)
    - buffer: number of extra candles before the plotted region (e.g. to show full 3-line strikes)
    - open_browser: whether to open HTML chart in browser
    """

    # Determine the starting point with buffer
    start_index = max(len(df) - max_points - buffer, 0)
    df_plot = df.iloc[start_index:].copy()

    output_dir = './plots/'
    os.makedirs(output_dir, exist_ok=True)

    top_patterns = ['InvertedHammer', 'ThreeLineStrike', 'HangingMan'] # Can adjust - add other candlestick pattern

    for name in top_patterns:
        signal_col = f'Signal_{name}'

        if signal_col not in df_plot.columns:
            print(f"Warning: Signal column '{signal_col}' not found for {name}. Skipping plot.")
            continue

        up_signals = df_plot[df_plot[signal_col] == 1]
        down_signals = df_plot[df_plot[signal_col] == -1]
        neutral_signals = df_plot[df_plot[signal_col] == 9]

        fig = make_subplots(
            rows=2, cols=1,
            shared_xaxes=True,
            vertical_spacing=0.1,
            subplot_titles=['Candlestick + MA', 'RSI'],
            row_heights=[0.7, 0.3]
        )

        fig.add_trace(
            go.Candlestick(
                x=df_plot.index,
                open=df_plot['open'],
                high=df_plot['high'],
                low=df_plot['low'],
                close=df_plot['close'],
                name='Candlestick',
                increasing_line_color='green',
                decreasing_line_color='red'
            ),
            row=1, col=1
        )

        fig.add_trace(
            go.Scatter(x=df_plot.index, y=df_plot['MA20'], mode='lines', name='20 MA', line=dict(color='blue')),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(x=df_plot.index, y=df_plot['MA50'], mode='lines', name='50 MA', line=dict(color='purple')),
            row=1, col=1
        )

        if not up_signals.empty:
            fig.add_trace(
                go.Scatter(
                    x=up_signals.index,
                    y=up_signals['close'] * 1.005,
                    mode='markers',
                    marker=dict(symbol='triangle-up', color='green', size=10),
                    name='Bullish Signal',
                    text=[f'Bullish {name}' for _ in range(len(up_signals))],
                    hoverinfo='text+x+y'
                ),
                row=1, col=1
            )
        if not down_signals.empty:
            fig.add_trace(
                go.Scatter(
                    x=down_signals.index,
                    y=down_signals['close'] * 0.995,
                    mode='markers',
                    marker=dict(symbol='triangle-down', color='red', size=10),
                    name='Bearish Signal',
                    text=[f'Bearish {name}' for _ in range(len(down_signals))],
                    hoverinfo='text+x+y'
                ),
                row=1, col=1
            )
        if not neutral_signals.empty:
            fig.add_trace(
                go.Scatter(
                    x=neutral_signals.index,
                    y=neutral_signals['close'],
                    mode='markers',
                    marker=dict(symbol='circle', color='gray', size=8),
                    name='Neutral Signal',
                    text=[f'Neutral {name}' for _ in range(len(neutral_signals))],
                    hoverinfo='text+x+y'
                ),
                row=1, col=1
            )

        fig.add_trace(
            go.Scatter(x=df_plot.index, y=df_plot['RSI'], mode='lines', name='RSI', line=dict(color='blue')),
            row=2, col=1
        )
        fig.add_hline(y=50, line_dash='dash', line_color='black', row=2, col=1)

        fig.update_layout(
            title=f'{name} Signals for {symbol} with MA and RSI',
            xaxis_title='Time',
            yaxis_title='Price ($)',
            yaxis2_title='RSI',
            xaxis_rangeslider_visible=False,
            showlegend=True,
            height=600,
            template='plotly_white'
        )

        html_path = os.path.join(output_dir, f'{name}_signals.html')
        fig.write_html(html_path)
        print(f"Saved plot for {name} to {html_path}")

        if open_browser:
            abs_path = os.path.abspath(html_path)
            webbrowser.open(f'file://{abs_path}')
            print(f"Opened plot for {name} in default browser")


### Def Main:

In [10]:
import pandas as pd

def main():
    csv_path = '/Users/wynn/PycharmProjects/MMAT/notebooks/btc_1min.csv' # adjust to your file
    df = load_data(csv_path)
    if df is None:
        print("Error: Failed to load data.")
        return

    # Validate required columns
    required_cols = ['open', 'high', 'low', 'close']
    if not all(col in df.columns for col in required_cols):
        print(f"Error: DataFrame missing required columns: {required_cols}")
        return

    df = resample_to_15min(df)
    if df.empty:
        print("Error: Resampled DataFrame is empty.")
        return

    df = calculate_indicators(df)
    if not all(col in df.columns for col in ['MA20', 'MA50', 'RSI']):
        print("Error: calculate_indicators did not generate required columns: ['MA20', 'MA50', 'RSI']")
        return

    df, patterns = calculate_patterns(df)
    df = generate_signals(df, patterns)
    #print(df[['GravestoneDoji']].value_counts())

    # Evaluate pattern accuracy
    accuracy_results = evaluate_patterns(df, patterns)

    # Print accuracy results
    print("\n--- Singal Accuracy Rate Evaluation Results ---")
    for name, metrics in sorted(accuracy_results.items(), key=lambda x: x[1]['accuracy'], reverse=True):
        print(f"{name} - Accuracy: {metrics['accuracy']:.2f}%, Total Signals: {metrics['total_signals']}, Correct Signals: {metrics['correct_signals']}")

    # Plot signals for selected patterns
    plot_pattern_results(df, patterns, 'BTC')

if __name__ == "__main__":
    main()

Total K-lines loaded: 526000
Resampled to 15min, total K-lines: 35067
Hammer detected 1058 times.
InvertedHammer detected 134 times.
BullishEngulfing detected 1008 times.
PiercingLine detected 3 times.
MorningDojiStar detected 17 times.
MorningStar detected 87 times.
ThreeWhiteSoldiers detected 43 times.
HangingMan detected 590 times.
ShootingStar detected 161 times.
BearishEngulfing detected 1006 times.
DarkCloudCover detected 1 times.
EveningDojiStar detected 20 times.
EveningStar detected 81 times.
ThreeBlackCrows detected 4 times.
ThreeLineStrike detected 82 times.
DragonflyDoji detected 634 times.
GravestoneDoji detected 546 times.
UpsideTasukiGap detected 0 times.
LongLine detected 6718 times.

--- Singal Accuracy Rate Evaluation Results ---
DarkCloudCover - Accuracy: 100.00%, Total Signals: 1, Correct Signals: 1
ThreeLineStrike - Accuracy: 50.00%, Total Signals: 40, Correct Signals: 20
InvertedHammer - Accuracy: 47.01%, Total Signals: 134, Correct Signals: 63
HangingMan - Accura

## Final Evaluation Result:

### Candlestick Pattern Performance Comparison

**Data Summary**  
- 1-minute BTC K-lines loaded: 526,000  
- Resampled to 15-minute interval: 35,067 K-lines  

| Pattern             | Type    | Detections | Signal Count | Total Signals | Correct Signals | Accuracy (%) |
|---------------------|---------|------------|--------------|---------------|-----------------|--------------|
| DarkCloudCover      | Bearish | 1          | 1            | 1             | 1               | 100.00       |
| ThreeLineStrike     | Both    | 82         | 82           | 82            | 40              | 50.00        |
| InvertedHammer      | Bullish | 134        | 134          | 134           | 63              | 47.01        |
| HangingMan          | Bearish | 590        | 590          | 590           | 254             | 43.05        |
| BullishEngulfing    | Bullish | 1008       | 1008         | 1008          | 408             | 40.48        |
| EveningDojiStar     | Bearish | 20         | 20           | 20            | 8               | 40.00        |
| BearishEngulfing    | Bearish | 1006       | 1006         | 1006          | 389             | 38.67        |
| ShootingStar        | Bearish | 161        | 161          | 161           | 62              | 38.51        |
| GravestoneDoji      | Bearish | 546        | 546          | 546           | 210             | 38.46        |
| Hammer              | Bullish | 1058       | 1058         | 1058          | 395             | 37.33        |
| LongLine            | Both    | 6718       | 6718         | 6718          | 2411            | 35.90        |
| DragonflyDoji       | Bullish | 634        | 634          | 634           | 224             | 35.33        |
| EveningStar         | Bearish | 81         | 81           | 81            | 28              | 34.57        |
| MorningStar         | Bullish | 87         | 87           | 87            | 30              | 34.48        |
| PiercingLine        | Bullish | 3          | 3            | 3             | 1               | 33.33        |
| ThreeBlackCrows     | Bearish | 4          | 4            | 4             | 1               | 25.00        |
| MorningDojiStar     | Bullish | 17         | 17           | 17            | 4               | 23.53        |
| ThreeWhiteSoldiers  | Bullish | 43         | 43           | 43            | 10              | 23.26        |
| UpsideTasukiGap     | Bullish | 0          | 0            | 0             | 0               | 0.00         |

---

### Notes and Insights

**High-Frequency Patterns**  
- `LongLine` and `Hammer` are frequently detected, providing sufficient sample size.  
- But their accuracies are relatively around 35-37%, requiring confirmation with other indicators.

**Low-Sample Patterns**  
-  `DarkCloudCover` (1 signal) and `PiercingLine` (3 signals) have very limited data points.  

**Zero-Signal Patterns**  
- `UpsideTasukiGap` was not detected at all in the dataset.

**Accuracy Definition**  
- Accuracy is calculated based on whether the price moved more than ±0.05% in the expected direction within 1 candle after the signal was generated.

**No Data Leakage**  
- All signals are generated using only past or current candle data.  
- Future prices are used only for evaluation purposes (`shift(-1)` during backtest), not for signal generation.  
- This ensures fair performance assessment and no forward-looking bias.
