# Install dependencies

In [391]:
%pip install -r requirements.txt -q

Note: you may need to restart the kernel to use updated packages.


# imports

In [392]:
import numpy as np
import plotly.graph_objects as go
import pandas as pd
import random
import os
from datetime import datetime, timedelta
import tradermade
import yfinance as yf
from typing import List, TypedDict, Union, Dict

# Constants

In [393]:
# TRADERMADE_API_KEY = os.getenv("TRADERMADE_API_KEY")
# CURRENCY = "XAUUSD"
# REQUIRED_COLUMNS = ['Open', 'High', 'Low', 'Close']
# TICKER = "SPY"
# END_DATE = datetime.now()
# START_DATE = END_DATE - timedelta(days=365)
# INTERVAL = "1d"

START_TIME = "2025-07-14 09:30:00"
END_TIME = "2025-07-14 16:00:00"

# Initialization

In [394]:
# tradermade.set_rest_api_key(TRADERMADE_API_KEY)
pd.set_option('display.max_rows', None)

# Get unstructured data (tick level)

## Get from yfinance (bars)

In [395]:
# df = yf.download(TICKER, start=START_DATE, end=END_DATE, interval=INTERVAL, auto_adjust=False, ignore_tz=True)

### Preprocessing data

#### Handle MultiIndex columns

In [396]:
# if isinstance(df.columns, pd.MultiIndex):
#     df = df.xs(TICKER, axis=1, level=1)
#     df.columns = [col.title() for col in df.columns]
# else:
#     df.columns = [col.title() for col in df.columns]

#### Convert required columns to numeric and drop rows with NaN

In [397]:
# df[REQUIRED_COLUMNS] = df[REQUIRED_COLUMNS].apply(pd.to_numeric, errors='coerce')
# df = df.dropna(subset=REQUIRED_COLUMNS)
# df.index = df.index.tz_localize(None)

## Mock data

In [398]:
def generate_random_trade_times(n_trades, start_time, end_time):
    start_ts = pd.to_datetime(start_time)
    end_ts = pd.to_datetime(end_time)

    total_seconds = int((end_ts - start_ts).total_seconds())

    # Generate n_trades random seconds within the time range
    random_seconds = sorted(random.sample(range(total_seconds), n_trades))
    
    return [start_ts + timedelta(seconds=s) for s in random_seconds]


def generate_mock_trades(num_trades=1000, start_price=100.0, start_time=START_TIME, end_time=END_TIME):
    timestamps = generate_random_trade_times(
        n_trades=1000,
        start_time=start_time,
        end_time=end_time
    )

    prices = [start_price]
    for _ in range(1, num_trades):
        # Simulate small price changes
        change = np.random.normal(loc=0, scale=0.05)
        prices.append(round(prices[-1] + change, 2))

    volumes = np.random.randint(1, 1000, size=num_trades)

    df = pd.DataFrame({
        'timestamp': timestamps,
        'price': prices,
        'volume': volumes
    })
    return df

df = generate_mock_trades()[::-1]
df

Unnamed: 0,timestamp,price,volume
999,2025-07-14 15:59:57,98.08,476
998,2025-07-14 15:59:38,98.11,72
997,2025-07-14 15:59:26,98.15,868
996,2025-07-14 15:59:13,98.07,939
995,2025-07-14 15:58:36,98.12,127
994,2025-07-14 15:58:25,98.1,278
993,2025-07-14 15:58:12,98.04,144
992,2025-07-14 15:57:28,98.02,757
991,2025-07-14 15:57:23,98.0,952
990,2025-07-14 15:57:17,98.03,861


# Convert unstructured data to bars

In [399]:
def convert_to_bars(trade_groups: List[Dict[str, Union[datetime, float, int]]]) -> pd.DataFrame: # start_time, end_time, open, close, high, low, volume
    bars = [
        {
            "start_time": group["timestamp"].iloc[0],
            "end_time": group["timestamp"].iloc[-1],
            "open": group["price"].iloc[0],
            "close": group["price"].iloc[-1],
            "high": group["price"].max(),
            "low": group["price"].min(),
            "volume": group["volume"].sum()
        } 
    for group in trade_groups]
    
    return pd.DataFrame(bars)

## Tick bars

In [400]:
def generate_tick_bars(unstructured_data, sampling_rate: int=5) -> pd.DataFrame: 
    unstructured_data = unstructured_data.sort_values("timestamp").reset_index(drop=True)
    # Grouping
    trade_groups = [unstructured_data.iloc[i:i + sampling_rate] for i in range(0, len(unstructured_data), sampling_rate)]
    # Generating bars
    return convert_to_bars(trade_groups)

tick_bars = generate_tick_bars(df)

## Time bars

In [401]:
def generate_time_bars(unstructured_data, sampling_rate: int=60, fill_empty: bool=False) -> pd.DataFrame:    
    unstructured_data_copy = unstructured_data.copy()
    unstructured_data_copy["timestamp"] = pd.to_datetime(unstructured_data_copy["timestamp"])
    unstructured_data_copy = unstructured_data_copy.set_index("timestamp")
    
    # Perform resampling and calculate OHLCV
    bars = unstructured_data_copy.resample(f"{sampling_rate}s").agg({
        "price": ["first", "max", "min", "last"],
        "volume": "sum"
    })

    bars.columns = ["open", "high", "low", "close", "volume"]
    bars = bars.reset_index()
    
    bars["start_time"] = bars["timestamp"]
    bars["end_time"] = bars["start_time"] + timedelta(seconds=sampling_rate) - timedelta(milliseconds=1)
    bars = bars.drop(columns=["timestamp"])

    # Fill empty time slots
    if fill_empty:
        empty_mask = bars["close"].isna()
        bars["close"] = bars["close"].ffill()

        for col in ["open", "high", "low"]:
            bars.loc[empty_mask, col] = bars.loc[empty_mask, "close"]

        bars["volume"] = bars["volume"].fillna(0)
        
    # Reorder columns
    cols = ["start_time", "end_time"] + [col for col in bars.columns if col not in ["start_time", "end_time"]]
    bars = bars[cols]
    return bars

time_bars = generate_time_bars(df)

## Volume bars

In [402]:
def generate_volume_bars(unstructured_data, sampling_rate: int=1000) -> pd.DataFrame:
    unstructured_data = unstructured_data.copy()
    unstructured_data["timestamp"] = pd.to_datetime(unstructured_data["timestamp"])
    unstructured_data = unstructured_data.sort_values("timestamp").reset_index(drop=True)
    
    bars = []
    cum_volume = 0
    bar_trades = []

    for idx, row in unstructured_data.iterrows():
        bar_trades.append(row)
        cum_volume += row["volume"]

        if cum_volume >= sampling_rate:
            group_df = pd.DataFrame(bar_trades)
            group_df = group_df.sort_values("timestamp")

            start_time = group_df["timestamp"].iloc[0]
            end_time = group_df["timestamp"].iloc[-1]

            bars.append({
                "start_time": start_time,
                "end_time": end_time,
                "open": group_df["price"].iloc[0],
                "close": group_df["price"].iloc[-1],
                "high": group_df["price"].max(),
                "low": group_df["price"].min(),
                "volume": group_df["volume"].sum()
            })

            # reset for next bar
            bar_trades = []
            cum_volume = 0

    # Handle leftover trades if any (optional)
    if bar_trades:
        group_df = pd.DataFrame(bar_trades)
        group_df = group_df.sort_values("timestamp")

        start_time = group_df["timestamp"].iloc[0]
        end_time = group_df["timestamp"].iloc[-1]

        bars.append({
            "start_time": start_time,
            "end_time": end_time,
            "open": group_df["price"].iloc[0],
            "close": group_df["price"].iloc[-1],
            "high": group_df["price"].max(),
            "low": group_df["price"].min(),
            "volume": group_df["volume"].sum()
        })

    bars_df = pd.DataFrame(bars)

    # Reorder columns
    cols = ["start_time", "end_time", "open", "close", "high", "low", "volume"]
    bars_df = bars_df[cols]
    return bars_df

volume_bars = generate_volume_bars(df)

## Dollar bars

In [403]:
def generate_dollar_bars(unstructured_data, sampling_rate: int=100000) -> pd.DataFrame:
    unstructured_data = unstructured_data.copy()
    unstructured_data["timestamp"] = pd.to_datetime(unstructured_data["timestamp"])
    unstructured_data = unstructured_data.sort_values("timestamp").reset_index(drop=True)
    
    bars = []
    cum_dollars = 0
    bar_trades = []

    for idx, row in unstructured_data.iterrows():
        bar_trades.append(row)
        cum_dollars += row["volume"] * row['price']

        if cum_dollars >= sampling_rate:
            group_df = pd.DataFrame(bar_trades)
            group_df = group_df.sort_values("timestamp")

            start_time = group_df["timestamp"].iloc[0]
            end_time = group_df["timestamp"].iloc[-1]

            bars.append({
                "start_time": start_time,
                "end_time": end_time,
                "open": group_df["price"].iloc[0],
                "close": group_df["price"].iloc[-1],
                "high": group_df["price"].max(),
                "low": group_df["price"].min(),
                "volume": group_df["volume"].sum()
            })

            # reset for next bar
            bar_trades = []
            cum_dollars = 0

    # Handle leftover trades if any (optional)
    if bar_trades:
        group_df = pd.DataFrame(bar_trades)
        group_df = group_df.sort_values("timestamp")

        start_time = group_df["timestamp"].iloc[0]
        end_time = group_df["timestamp"].iloc[-1]

        bars.append({
            "start_time": start_time,
            "end_time": end_time,
            "open": group_df["price"].iloc[0],
            "close": group_df["price"].iloc[-1],
            "high": group_df["price"].max(),
            "low": group_df["price"].min(),
            "volume": group_df["volume"].sum()
        })

    bars_df = pd.DataFrame(bars)

    # Reorder columns
    cols = ["start_time", "end_time", "open", "close", "high", "low", "volume"]
    bars_df = bars_df[cols]
    return bars_df

dollar_bars = generate_dollar_bars(df)

# Plot bars

## Tick bars

### Bar Chart

In [404]:
fig = go.Figure(data=[go.Candlestick(
    x=tick_bars.index,
    open=tick_bars['open'],
    high=tick_bars['high'],
    low=tick_bars['low'],
    close=tick_bars['close'],
    name='Tick Bars'
)])

fig.update_layout(
    title=f'Tick Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

### Time chart

In [405]:
fig = go.Figure(data=[go.Candlestick(
    x=tick_bars['start_time'],
    open=tick_bars['open'],
    high=tick_bars['high'],
    low=tick_bars['low'],
    close=tick_bars['close'],
    name='Tick Bars'
)])

fig.update_layout(
    title=f'Tick Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

## Time bars

### Bar Chart

In [406]:
fig = go.Figure(data=[go.Candlestick(
    x=time_bars.index,
    open=time_bars['open'],
    high=time_bars['high'],
    low=time_bars['low'],
    close=time_bars['close'],
    name='Time Bars'
)])

fig.update_layout(
    title=f'Time Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

### Time Chart

In [407]:
fig = go.Figure(data=[go.Candlestick(
    x=time_bars['start_time'],
    open=time_bars['open'],
    high=time_bars['high'],
    low=time_bars['low'],
    close=time_bars['close'],
    name='Time Bars'
)])

fig.update_layout(
    title=f'Time Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

## Volume bars

### Bar Chart

In [408]:
fig = go.Figure(data=[go.Candlestick(
    x=volume_bars.index,
    open=volume_bars['open'],
    high=volume_bars['high'],
    low=volume_bars['low'],
    close=volume_bars['close'],
    name='Volume Bars'
)])

fig.update_layout(
    title=f'Volume Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

### Time Chart

In [409]:
fig = go.Figure(data=[go.Candlestick(
    x=volume_bars['start_time'],
    open=volume_bars['open'],
    high=volume_bars['high'],
    low=volume_bars['low'],
    close=volume_bars['close'],
    name='Volume Bars'
)])

fig.update_layout(
    title=f'Volume Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

## Dollar bars

### Bar Chart

In [410]:
fig = go.Figure(data=[go.Candlestick(
    x=dollar_bars.index,
    open=dollar_bars['open'],
    high=dollar_bars['high'],
    low=dollar_bars['low'],
    close=dollar_bars['close'],
    name='Dollar Bars'
)])

fig.update_layout(
    title=f'Dollar Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

### Time Chart

In [411]:
fig = go.Figure(data=[go.Candlestick(
    x=dollar_bars['start_time'],
    open=dollar_bars['open'],
    high=dollar_bars['high'],
    low=dollar_bars['low'],
    close=dollar_bars['close'],
    name='Dollar Bars'
)])

fig.update_layout(
    title=f'Dollar Bars',
    yaxis_title='Price (USD)',
    xaxis_title='Bar',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

# Labeling Bars

In [412]:
def add_return(bars):
    # returns = ((bars['close'] / bars['open']) - 1) * 100 # Percentage of change
    returns = bars['close'].pct_change(fill_method=None) * 100 # Percentage of change
    # returns.dropna(inplace=True)
    bars['return'] = returns
    return bars

add_return(tick_bars)

Unnamed: 0,start_time,end_time,open,close,high,low,volume,return
0,2025-07-14 09:30:33,2025-07-14 09:33:36,100.0,99.99,100.04,99.99,2542,
1,2025-07-14 09:33:49,2025-07-14 09:36:15,100.1,100.05,100.11,100.05,3363,0.06
2,2025-07-14 09:36:44,2025-07-14 09:37:56,100.08,100.06,100.17,100.06,2067,0.01
3,2025-07-14 09:38:17,2025-07-14 09:39:30,100.03,100.14,100.15,100.03,2560,0.08
4,2025-07-14 09:39:34,2025-07-14 09:42:29,100.15,100.34,100.34,100.15,1387,0.2
5,2025-07-14 09:42:31,2025-07-14 09:43:43,100.37,100.45,100.45,100.37,2094,0.11
6,2025-07-14 09:43:46,2025-07-14 09:45:20,100.5,100.47,100.58,100.47,2399,0.02
7,2025-07-14 09:45:36,2025-07-14 09:47:08,100.45,100.31,100.45,100.31,1069,-0.16
8,2025-07-14 09:47:18,2025-07-14 09:48:38,100.33,100.22,100.37,100.22,2574,-0.09
9,2025-07-14 09:48:41,2025-07-14 09:50:45,100.28,100.24,100.29,100.24,2288,0.02


## Fixed-time Horizon Labeling

In [413]:
def fixed_time_horizon_labeling(bars, threshold: float=0.1) -> pd.DataFrame: # in percent
    bars['threshold'] = threshold
    
    # Calculate label
    passed_threshold = abs(bars['return']) > threshold
    bars['fixed_time_horizon_label'] = 0
    bars.loc[passed_threshold, 'fixed_time_horizon_label'] = abs(bars['return']) / bars['return']
    
    # Reorder columns
    cols = ["start_time", "end_time", "open", "close", "high", "low", "volume", "return", "threshold", "fixed_time_horizon_label"]
    bars = bars[cols]
    return bars

fixed_time_horizon_labeling(tick_bars, 0.04)

Unnamed: 0,start_time,end_time,open,close,high,low,volume,return,threshold,fixed_time_horizon_label
0,2025-07-14 09:30:33,2025-07-14 09:33:36,100.0,99.99,100.04,99.99,2542,,0.04,0
1,2025-07-14 09:33:49,2025-07-14 09:36:15,100.1,100.05,100.11,100.05,3363,0.06,0.04,1
2,2025-07-14 09:36:44,2025-07-14 09:37:56,100.08,100.06,100.17,100.06,2067,0.01,0.04,0
3,2025-07-14 09:38:17,2025-07-14 09:39:30,100.03,100.14,100.15,100.03,2560,0.08,0.04,1
4,2025-07-14 09:39:34,2025-07-14 09:42:29,100.15,100.34,100.34,100.15,1387,0.2,0.04,1
5,2025-07-14 09:42:31,2025-07-14 09:43:43,100.37,100.45,100.45,100.37,2094,0.11,0.04,1
6,2025-07-14 09:43:46,2025-07-14 09:45:20,100.5,100.47,100.58,100.47,2399,0.02,0.04,0
7,2025-07-14 09:45:36,2025-07-14 09:47:08,100.45,100.31,100.45,100.31,1069,-0.16,0.04,-1
8,2025-07-14 09:47:18,2025-07-14 09:48:38,100.33,100.22,100.37,100.22,2574,-0.09,0.04,-1
9,2025-07-14 09:48:41,2025-07-14 09:50:45,100.28,100.24,100.29,100.24,2288,0.02,0.04,0


## Dynamic Threshold Labeling

In [414]:
def dynamic_threshold_labeling(bars, span: int=1000) -> pd.DataFrame:
    # Calculate threshold
    dynamic_threshold = bars['return'].ewm(span=span, min_periods=1).std()
    bars['threshold'] = dynamic_threshold
    # ewm_std.dropna(inplace=True)
    
    # Calculate label
    passed_threshold = abs(bars['return']) > dynamic_threshold
    bars['dynamic_threshold_label'] = 0
    bars.loc[passed_threshold, 'dynamic_threshold_label'] = abs(bars['return']) / bars['return']
    
    # Reorder columns
    cols = ["start_time", "end_time", "open", "close", "high", "low", "volume", "return", "threshold", "dynamic_threshold_label"]
    bars = bars[cols]
    return bars

dynamic_threshold_labeling(tick_bars)

Unnamed: 0,start_time,end_time,open,close,high,low,volume,return,threshold,dynamic_threshold_label
0,2025-07-14 09:30:33,2025-07-14 09:33:36,100.0,99.99,100.04,99.99,2542,,,0
1,2025-07-14 09:33:49,2025-07-14 09:36:15,100.1,100.05,100.11,100.05,3363,0.06,,0
2,2025-07-14 09:36:44,2025-07-14 09:37:56,100.08,100.06,100.17,100.06,2067,0.01,0.04,0
3,2025-07-14 09:38:17,2025-07-14 09:39:30,100.03,100.14,100.15,100.03,2560,0.08,0.04,1
4,2025-07-14 09:39:34,2025-07-14 09:42:29,100.15,100.34,100.34,100.15,1387,0.2,0.08,1
5,2025-07-14 09:42:31,2025-07-14 09:43:43,100.37,100.45,100.45,100.37,2094,0.11,0.07,1
6,2025-07-14 09:43:46,2025-07-14 09:45:20,100.5,100.47,100.58,100.47,2399,0.02,0.07,0
7,2025-07-14 09:45:36,2025-07-14 09:47:08,100.45,100.31,100.45,100.31,1069,-0.16,0.11,-1
8,2025-07-14 09:47:18,2025-07-14 09:48:38,100.33,100.22,100.37,100.22,2574,-0.09,0.11,0
9,2025-07-14 09:48:41,2025-07-14 09:50:45,100.28,100.24,100.29,100.24,2288,0.02,0.11,0


# Plot labaled bars

In [415]:
import plotly.graph_objects as go

# Filter bars with non-zero labels
dynamic_threshold_buy_signals = tick_bars[tick_bars['dynamic_threshold_label'] == 1]   # Bars labeled +1
dynamic_threshold_sell_signals = tick_bars[tick_bars['dynamic_threshold_label'] == -1] # Bars labeled -1

fixed_time_horizon_buy_signals = tick_bars[tick_bars['fixed_time_horizon_label'] == 1]   # Bars labeled +1
fixed_time_horizon_sell_signals = tick_bars[tick_bars['fixed_time_horizon_label'] == -1] # Bars labeled -1

# Create candlestick plot
fig = go.Figure(data=[go.Candlestick(
    x=tick_bars.index,
    open=tick_bars['open'],
    high=tick_bars['high'],
    low=tick_bars['low'],
    close=tick_bars['close'],
    name='Price'
)])

# Add ▲ markers ONLY for dynamic threshold buy signals (+1)
fig.add_trace(go.Scatter(
    x=dynamic_threshold_buy_signals.index,
    y=dynamic_threshold_buy_signals['high'] + 0.5,  # Offset above the bar
    mode='markers',
    marker=dict(
        symbol='triangle-up',
        size=8,
        color='yellow',
        line=dict(width=1, color='DarkGreen')
    ),
    name='Buy Signal (+1)',
    showlegend=True
))

# Add ▼ markers ONLY for dynamic threshold sell signals (-1)
fig.add_trace(go.Scatter(
    x=dynamic_threshold_sell_signals.index,
    y=dynamic_threshold_sell_signals['low'] - 0.5,  # Offset below the bar
    mode='markers',
    marker=dict(
        symbol='triangle-down',
        size=8,
        color='blue',
        line=dict(width=1, color='DarkRed')
    ),
    name='Sell Signal (-1)',
    showlegend=True
))

# -----------------------------------------------------------

# Add ▲ markers ONLY for static threshold buy signals (+1)
fig.add_trace(go.Scatter(
    x=fixed_time_horizon_buy_signals.index,
    y=fixed_time_horizon_buy_signals['high'] + 1,  # Offset above the bar
    mode='markers',
    marker=dict(
        symbol='triangle-up',
        size=8,
        color='green',
        line=dict(width=1, color='DarkGreen')
    ),
    name='Buy Signal (+1)',
    showlegend=True
))

# Add ▼ markers ONLY for static threshold sell signals (-1)
fig.add_trace(go.Scatter(
    x=fixed_time_horizon_sell_signals.index,
    y=fixed_time_horizon_sell_signals['low'] - 1,  # Offset below the bar
    mode='markers',
    marker=dict(
        symbol='triangle-down',
        size=8,
        color='red',
        line=dict(width=1, color='DarkRed')
    ),
    name='Sell Signal (-1)',
    showlegend=True
))


# +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

# Update layout
fig.update_layout(
    title='Tick Bars with Buy/Sell Signals',
    yaxis_title='Price (USD)',
    xaxis_rangeslider_visible=True,
    template='seaborn',
    hovermode='x unified'
)

fig.show()