# Install dependencies

In [62]:
%pip install -r requirements.txt -q

Note: you may need to restart the kernel to use updated packages.


# imports

In [63]:
import numpy as np
import plotly.graph_objects as go
import pandas as pd
import random
import os
from datetime import datetime, timedelta
import tradermade
import yfinance as yf
from typing import List, TypedDict, Union, Dict

# Constants

In [64]:
TRADERMADE_API_KEY = os.getenv("TRADERMADE_API_KEY")
CURRENCY = "XAUUSD"
REQUIRED_COLUMNS = ['Open', 'High', 'Low', 'Close']
TICKER = "SPY"

END_DATE = datetime.now()
START_DATE = END_DATE - timedelta(days=365)
INTERVAL = "1d"

# Initialization

In [65]:
tradermade.set_rest_api_key(TRADERMADE_API_KEY)

# Get unstructured data (tick level)

## Get from yfinance (bars)

In [66]:
# df = yf.download(TICKER, start=START_DATE, end=END_DATE, interval=INTERVAL, auto_adjust=False, ignore_tz=True)

### Preprocessing data

#### Handle MultiIndex columns

In [67]:
# if isinstance(df.columns, pd.MultiIndex):
#     df = df.xs(TICKER, axis=1, level=1)
#     df.columns = [col.title() for col in df.columns]
# else:
#     df.columns = [col.title() for col in df.columns]

#### Convert required columns to numeric and drop rows with NaN

In [68]:
# df[REQUIRED_COLUMNS] = df[REQUIRED_COLUMNS].apply(pd.to_numeric, errors='coerce')
# df = df.dropna(subset=REQUIRED_COLUMNS)
# df.index = df.index.tz_localize(None)

## Mock data

In [69]:
START_TIME = "2025-07-14 09:30:00"
END_TIME = "2025-07-14 16:00:00"

def generate_random_trade_times(n_trades, start_time, end_time):
    start_ts = pd.to_datetime(start_time)
    end_ts = pd.to_datetime(end_time)

    total_seconds = int((end_ts - start_ts).total_seconds())

    # Generate n_trades random seconds within the time range
    random_seconds = sorted(random.sample(range(total_seconds), n_trades))
    
    return [start_ts + timedelta(seconds=s) for s in random_seconds]


def generate_mock_trades(num_trades=1000, start_price=100.0, start_time=START_TIME, end_time=END_TIME):
    timestamps = generate_random_trade_times(
        n_trades=1000,
        start_time=start_time,
        end_time=end_time
    )

    prices = [start_price]
    for _ in range(1, num_trades):
        # Simulate small price changes
        change = np.random.normal(loc=0, scale=0.05)
        prices.append(round(prices[-1] + change, 2))

    volumes = np.random.randint(1, 1000, size=num_trades)

    df = pd.DataFrame({
        'timestamp': timestamps,
        'price': prices,
        'volume': volumes
    })
    return df

df = generate_mock_trades()
df

Unnamed: 0,timestamp,price,volume
0,2025-07-14 09:30:39,100.00,262
1,2025-07-14 09:30:40,99.99,905
2,2025-07-14 09:31:02,100.01,844
3,2025-07-14 09:31:15,99.99,153
4,2025-07-14 09:31:44,100.00,670
...,...,...,...
995,2025-07-14 15:58:42,99.54,490
996,2025-07-14 15:59:07,99.48,607
997,2025-07-14 15:59:10,99.46,992
998,2025-07-14 15:59:16,99.45,682


# Convert unstructured data to bars

In [None]:
def convert_to_bars(trade_groups: List[Dict[str, Union[datetime, float, int]]]) -> pd.DataFrame: # start_time, end_time, open, close, high, low, volume
    # Sort based on time
    trade_groups = [df.sort_values(by="timestamp") for df in trade_groups]
    # Calculate columns
    bars = [
        {
            "start_time": group["timestamp"].iloc[0],
            "end_time": group["timestamp"].iloc[-1],
            "open": group["price"].iloc[0],
            "close": group["price"].iloc[-1],
            "high": group["price"].max(),
            "low": group["price"].min(),
            "volume": group["volume"].sum()
        } 
    for group in trade_groups]
    
    return pd.DataFrame(bars)

## Time bars

In [None]:
def generate_time_bars(unstructured_data, sampling_rate: int=60, fill_empty: bool=False) -> pd.DataFrame:    
    unstructured_data_copy = unstructured_data.copy()
    unstructured_data_copy["timestamp"] = pd.to_datetime(unstructured_data_copy["timestamp"])
    unstructured_data_copy = unstructured_data_copy.set_index("timestamp")
    
    # Perform resampling and calculate OHLCV
    bars = unstructured_data_copy.resample(f"{sampling_rate}s").agg({
        "price": ["first", "max", "min", "last"],
        "volume": "sum"
    })

    bars.columns = ["open", "high", "low", "close", "volume"]
    bars = bars.reset_index()
    
    bars["start_time"] = bars["timestamp"]
    bars["end_time"] = bars["start_time"] + timedelta(seconds=sampling_rate) - timedelta(milliseconds=1)
    bars = bars.drop(columns=["timestamp"])

    # Fill empty time slots
    if fill_empty:
        empty_mask = bars["close"].isna()
        bars["close"] = bars["close"].ffill()

        for col in ["open", "high", "low"]:
            bars.loc[empty_mask, col] = bars.loc[empty_mask, "close"]

        bars["volume"] = bars["volume"].fillna(0)
        
    # Reorder columns
    cols = ["start_time", "end_time"] + [col for col in bars.columns if col not in ["start_time", "end_time"]]
    bars = bars[cols]
    return bars

Unnamed: 0,start_time,end_time,open,high,low,close,volume
0,2025-07-14 09:30:00,2025-07-14 09:30:59.999,100.00,100.00,99.99,99.99,1167
1,2025-07-14 09:31:00,2025-07-14 09:31:59.999,100.01,100.01,99.99,100.00,1667
2,2025-07-14 09:32:00,2025-07-14 09:32:59.999,100.02,100.02,100.02,100.02,836
3,2025-07-14 09:33:00,2025-07-14 09:33:59.999,100.07,100.17,100.07,100.17,1343
4,2025-07-14 09:34:00,2025-07-14 09:34:59.999,,,,,0
...,...,...,...,...,...,...,...
385,2025-07-14 15:55:00,2025-07-14 15:55:59.999,99.38,99.44,99.38,99.44,107
386,2025-07-14 15:56:00,2025-07-14 15:56:59.999,99.47,99.47,99.45,99.45,1439
387,2025-07-14 15:57:00,2025-07-14 15:57:59.999,99.40,99.64,99.40,99.56,2696
388,2025-07-14 15:58:00,2025-07-14 15:58:59.999,99.54,99.54,99.54,99.54,490


## Tick bars

In [None]:
def generate_tick_bars(unstructured_data, sampling_rate=5) -> pd.DataFrame: 
    # Grouping
    trade_groups = [unstructured_data.iloc[i:i + sampling_rate] for i in range(0, len(unstructured_data), sampling_rate)]
    # Generating bars
    return convert_to_bars(trade_groups)

Unnamed: 0,start_time,end_time,open,close,high,low,volume
0,2025-07-14 09:30:39,2025-07-14 09:31:44,100.00,100.00,100.01,99.99,2834
1,2025-07-14 09:32:48,2025-07-14 09:35:19,100.02,100.23,100.23,100.02,3141
2,2025-07-14 09:35:49,2025-07-14 09:38:51,100.21,100.29,100.29,100.21,2290
3,2025-07-14 09:38:52,2025-07-14 09:39:32,100.35,100.46,100.46,100.35,1577
4,2025-07-14 09:40:43,2025-07-14 09:42:24,100.45,100.31,100.45,100.31,2282
...,...,...,...,...,...,...,...
195,2025-07-14 15:52:33,2025-07-14 15:53:14,99.54,99.48,99.54,99.48,2155
196,2025-07-14 15:53:25,2025-07-14 15:54:36,99.40,99.40,99.50,99.38,2972
197,2025-07-14 15:55:15,2025-07-14 15:57:07,99.38,99.40,99.47,99.38,1766
198,2025-07-14 15:57:26,2025-07-14 15:57:59,99.45,99.56,99.64,99.45,2476


## Volume bars

## Dollar bars

# Plot bars

## Time bars

In [73]:
fig = go.Figure(data=[go.Candlestick(
    x=df.index,
    open=df['Open'],
    high=df['High'],
    low=df['Low'],
    close=df['Close'],
    name='SPY'
)])

fig.update_layout(
    title=f'SPY (S&P 500 ETF) Candlestick Chart - Last 60 Days',
    yaxis_title='Price (USD)',
    xaxis_title='Date',
    xaxis_rangeslider_visible=True, 
    template='seaborn', # other options: plotly, plotly_white, plotly_dark, simple_white
    showlegend=True,
)

KeyError: 'Open'

## Volume bars

In [None]:
# Assume each row = 1 trade (approximation)
tick_size = 1000  # Create a bar every 1000 "ticks"
tick_bars = df.iloc[::tick_size]  # Slice every N rows

cumulative_volume = df['Volume'].cumsum()
volume_bar_size = 10000  # New bar every 10,000 shares traded

# Group by volume thresholds
volume_bars = df.groupby(cumulative_volume // volume_bar_size).agg({
    'Open': 'first',
    'High': 'max',
    'Low': 'min',
    'Close': 'last',
    'Volume': 'sum'
})

import plotly.graph_objects as go

fig = go.Figure(data=[go.Candlestick(
    x=volume_bars.index,
    open=volume_bars['Open'],
    high=volume_bars['High'],
    low=volume_bars['Low'],
    close=volume_bars['Close']
)])

fig.update_layout(
    title="AAPL Volume Bars (10,000 shares/bar)",
    yaxis_title="Price",
    xaxis_title="Volume Bar Index",
    template="plotly_dark"
)
fig.show()