Newcoderorigin · Newcoderorigin · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025
diff --git a/toptek/.env.example b/toptek/.env.example
@@ -0,0 +1,5 @@
+PX_BASE_URL=https://gateway-api-demo.s2f.projectx.com
+PX_MARKET_HUB=https://gateway-rtc-demo.s2f.projectx.com/hubs/market
+PX_USER_HUB=https://gateway-rtc-demo.s2f.projectx.com/hubs/user
+PX_USERNAME=bot-user
+PX_API_KEY=replace-me
diff --git a/toptek/README.md b/toptek/README.md
@@ -0,0 +1,90 @@
+# Toptek Starter
+
+## Overview
+
+Toptek is a Windows-friendly starter kit for working with the ProjectX Gateway (TopstepX) to research futures markets, engineer features, train simple models, backtest ideas, and manage paper/live trading from a single interface. It combines a Tkinter GUI with a CLI for automation-friendly workflows.
+
+> **Not financial advice. Manual trading decisions only. Always respect Topstep rules and firm risk limits.**
+
+## Quickstart
+
+```powershell
+# Windows, Python 3.11
+py -3.11 -m venv .venv
+.venv\Scripts\activate
+pip install --upgrade pip
+pip install -r requirements-lite.txt
+
+copy .env.example .env
+# edit PX_* in .env OR use GUI Settings
+python main.py
+```
+
+## CLI usage examples
+
+```powershell
+python main.py --cli train --symbol ESZ5 --timeframe 5m --lookback 90d
+python main.py --cli backtest --symbol ESZ5 --timeframe 5m --start 2025-01-01
+python main.py --cli paper --symbol ESZ5 --timeframe 5m
+```
+
+## Project structure
+
+```
+toptek/
+  main.py
+  README.md
+  requirements-lite.txt
+  requirements-streaming.txt
+  .env.example
+  config/
+    app.yml
+    risk.yml
+    features.yml
+  core/
+    gateway.py
+    symbols.py
+    data.py
+    features.py
+    model.py
+    backtest.py
+    risk.py
+    live.py
+    utils.py
+  gui/
+    app.py
+    widgets.py
+```
+
+## Configuration
+
+Configuration defaults live under the `config/` folder and are merged with values from `.env`. Use the GUI Settings tab (Login section) to create or update the `.env` file if one is missing.
+
+## Requirements profiles
+
+- `requirements-lite.txt`: minimal dependencies for polling workflows.
+- `requirements-streaming.txt`: extends the lite profile with optional SignalR streaming support.
+
+## Development notes
+
+- Source code is fully typed and documented with docstrings.
+- HTTP interactions with ProjectX Gateway rely on `httpx` with retry-once semantics for authentication failures.
+- Feature engineering uses `numpy` and `ta` indicators; additional features can be added to `core/features.py`.
+- Models are persisted locally in the `models/` folder.
+
+## Safety
+
+- Symbol validation ensures only CME/CBOT/NYMEX/COMEX futures are traded.
+- Risk limits derive from `config/risk.yml` and the GUI enforces Topstep-style guardrails.
+- No trading activity occurs automatically; all orders require manual confirmation.
+
+## Optional streaming
+
+Install the streaming extras when ready to experiment with SignalR real-time data:
+
+```powershell
+pip install -r requirements-streaming.txt
+```
+
+Streaming helpers are stubbed in `core/live.py` and disabled unless `signalrcore` is installed.
+
diff --git a/toptek/config/app.yml b/toptek/config/app.yml
@@ -0,0 +1,4 @@
+polling_interval_seconds: 5
+cache_directory: data/cache
+models_directory: models
+log_level: INFO
diff --git a/toptek/config/features.yml b/toptek/config/features.yml
@@ -0,0 +1,14 @@
+default_timeframe: 5m
+lookback_minutes: 1440
+feature_set:
+  - rsi_14
+  - ema_fast_12
+  - ema_slow_26
+  - macd
+  - atr_14
+  - bollinger_perc_20
+  - roc_10
+  - obv
+  - adx_14
+  - donchian_width_20
+  - volatility_parkinson
diff --git a/toptek/config/risk.yml b/toptek/config/risk.yml
@@ -0,0 +1,8 @@
+max_position_size: 5
+max_daily_loss: 2500
+restricted_trading_hours:
+  - start: "15:55"
+    end: "16:05"
+atr_multiplier_stop: 2.0
+cooldown_losses: 2
+cooldown_minutes: 30
diff --git a/toptek/core/__init__.py b/toptek/core/__init__.py
@@ -0,0 +1 @@
+"""Core modules for Toptek."""
diff --git a/toptek/core/backtest.py b/toptek/core/backtest.py
@@ -0,0 +1,37 @@
+"""Vectorised backtesting utilities for evaluating strategies."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict
+
+import numpy as np
+
+
+@dataclass
+class BacktestResult:
+    """Summary statistics for a backtest run."""
+
+    hit_rate: float
+    sharpe: float
+    max_drawdown: float
+    expectancy: float
+    equity_curve: np.ndarray
+
+
+def run_backtest(returns: np.ndarray, signals: np.ndarray, *, fee_per_trade: float = 0.0) -> BacktestResult:
+    """Run a simple long/flat backtest."""
+
+    trade_returns = returns * signals - fee_per_trade
+    equity_curve = np.cumsum(trade_returns)
+    wins = trade_returns > 0
+    hit_rate = float(wins.mean()) if len(trade_returns) else 0.0
+    sharpe = float(np.mean(trade_returns) / (np.std(trade_returns) + 1e-9) * np.sqrt(252))
+    running_max = np.maximum.accumulate(equity_curve)
+    drawdowns = running_max - equity_curve
+    max_drawdown = float(drawdowns.max()) if len(drawdowns) else 0.0
+    expectancy = float(np.mean(trade_returns))
+    return BacktestResult(hit_rate=hit_rate, sharpe=sharpe, max_drawdown=max_drawdown, expectancy=expectancy, equity_curve=equity_curve)
+
+
+__all__ = ["run_backtest", "BacktestResult"]
diff --git a/toptek/core/data.py b/toptek/core/data.py
@@ -0,0 +1,91 @@
+"""Data retrieval and local caching helpers."""
+
+from __future__ import annotations
+
+import json
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, Iterable, List
+
+import numpy as np
+import pandas as pd
+
+from .gateway import ProjectXGateway
+from .utils import build_logger
+
+
+logger = build_logger(__name__)
+
+
+def _cache_file(cache_dir: Path, symbol: str, timeframe: str) -> Path:
+    safe_symbol = symbol.replace("/", "-")
+    return cache_dir / f"{safe_symbol}_{timeframe}.json"
+
+
+def load_cached_bars(cache_dir: Path, symbol: str, timeframe: str) -> List[Dict[str, Any]]:
+    """Load cached bar data if available."""
+
+    path = _cache_file(cache_dir, symbol, timeframe)
+    if not path.exists():
+        return []
+    with path.open("r", encoding="utf-8") as handle:
+        return json.load(handle)
+
+
+def save_cached_bars(cache_dir: Path, symbol: str, timeframe: str, bars: Iterable[Dict[str, Any]]) -> None:
+    """Persist bar data to disk for reuse."""
+
+    path = _cache_file(cache_dir, symbol, timeframe)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with path.open("w", encoding="utf-8") as handle:
+        json.dump(list(bars), handle)
+
+
+def fetch_bars(
+    gateway: ProjectXGateway,
+    *,
+    symbol: str,
+    timeframe: str,
+    start: datetime,
+    end: datetime,
+    cache_dir: Path,
+) -> List[Dict[str, Any]]:
+    """Fetch bars from ProjectX or local cache."""
+
+    cached = load_cached_bars(cache_dir, symbol, timeframe)
+    if cached:
+        return cached
+    payload = {
+        "contractSymbol": symbol,
+        "timeFrame": timeframe,
+        "startTime": start.isoformat(),
+        "endTime": end.isoformat(),
+    }
+    response = gateway.retrieve_bars(payload)
+    bars = response.get("bars", [])
+    save_cached_bars(cache_dir, symbol, timeframe, bars)
+    return bars
+
+
+def resample_ohlc(bars: List[Dict[str, Any]], *, field: str = "close") -> np.ndarray:
+    """Return a numpy array of a given bar field."""
+
+    return np.array([float(bar.get(field, 0.0)) for bar in bars], dtype=float)
+
+
+__all__ = ["fetch_bars", "resample_ohlc", "load_cached_bars", "save_cached_bars", "sample_dataframe"]
+
+
+
+def sample_dataframe(rows: int = 500) -> pd.DataFrame:
+    """Generate a synthetic OHLCV DataFrame for offline workflows."""
+
+    index = pd.date_range(end=datetime.utcnow(), periods=rows, freq="5min")
+    base = np.cumsum(np.random.randn(rows)) + 4500
+    high = base + np.random.rand(rows) * 2
+    low = base - np.random.rand(rows) * 2
+    close = base + np.random.randn(rows) * 0.5
+    open_ = close + np.random.randn(rows) * 0.3
+    volume = np.random.randint(100, 1000, size=rows)
+    return pd.DataFrame({"open": open_, "high": high, "low": low, "close": close, "volume": volume}, index=index)
+
diff --git a/toptek/core/features.py b/toptek/core/features.py
@@ -0,0 +1,105 @@
+"""Feature engineering utilities built on ``ta`` and ``numpy``."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Dict
+
+import numpy as np
+import pandas as pd
+from ta.momentum import RSIIndicator, ROCIndicator, StochasticOscillator, WilliamsRIndicator
+from ta.trend import ADXIndicator, CCIIndicator, EMAIndicator, MACD, PSARIndicator, SMAIndicator
+from ta.volatility import AverageTrueRange, BollingerBands, DonchianChannel
+from ta.volume import EaseOfMovementIndicator, MFIIndicator, OnBalanceVolumeIndicator
+
+
+@dataclass
+class FeatureResult:
+    """Represents computed feature arrays."""
+
+    name: str
+    values: np.ndarray
+
+
+def compute_features(data: pd.DataFrame) -> pd.DataFrame:
+    """Compute a broad set of technical indicators.
+
+    Args:
+        data: DataFrame with columns ``open``, ``high``, ``low``, ``close``, ``volume``.
+
+    Returns:
+        ``pandas.DataFrame`` of indicator features aligned to ``data`` index with
+        early NaN rows removed.
+    """
+
+    close = data["close"]
+    high = data["high"]
+    low = data["low"]
+    volume = data["volume"].replace(0, np.nan)
+    features: Dict[str, pd.Series] = {}
+
+    features["sma_10"] = SMAIndicator(close, window=10).sma_indicator()
+    features["sma_20"] = SMAIndicator(close, window=20).sma_indicator()
+    features["ema_12"] = EMAIndicator(close, window=12).ema_indicator()
+    features["ema_26"] = EMAIndicator(close, window=26).ema_indicator()
+    features["ema_50"] = EMAIndicator(close, window=50).ema_indicator()
+    features["ema_200"] = EMAIndicator(close, window=200).ema_indicator()
+
+    macd = MACD(close)
+    features["macd"] = macd.macd()
+    features["macd_signal"] = macd.macd_signal()
+    features["macd_hist"] = macd.macd_diff()
+
+    features["rsi_14"] = RSIIndicator(close, window=14).rsi()
+    features["roc_10"] = ROCIndicator(close, window=10).roc()
+    features["roc_20"] = ROCIndicator(close, window=20).roc()
+    features["willr_14"] = WilliamsRIndicator(high, low, close, lbp=14).williams_r()
+    features["stoch_k"] = StochasticOscillator(high, low, close).stoch()
+    features["stoch_d"] = StochasticOscillator(high, low, close).stoch_signal()
+
+    atr = AverageTrueRange(high, low, close, window=14)
+    features["atr_14"] = atr.average_true_range()
+
+    bb = BollingerBands(close, window=20, window_dev=2)
+    features["bb_high"] = bb.bollinger_hband()
+    features["bb_low"] = bb.bollinger_lband()
+    features["bb_percent"] = bb.bollinger_pband()
+    features["bb_width"] = bb.bollinger_wband()
+
+    donchian = DonchianChannel(high, low, close, window=20)
+    features["donchian_high"] = donchian.donchian_channel_hband()
+    features["donchian_low"] = donchian.donchian_channel_lband()
+    features["donchian_width"] = features["donchian_high"] - features["donchian_low"]
+
+    adx = ADXIndicator(high, low, close, window=14)
+    features["adx_14"] = adx.adx()
+    features["di_plus"] = adx.adx_pos()
+    features["di_minus"] = adx.adx_neg()
+
+    features["obv"] = OnBalanceVolumeIndicator(close, volume.fillna(0)).on_balance_volume()
+    features["mfi_14"] = MFIIndicator(high, low, close, volume.fillna(0), window=14).money_flow_index()
+    features["eom_14"] = EaseOfMovementIndicator(high, low, volume.fillna(1), window=14).ease_of_movement()
+
+    features["cci_20"] = CCIIndicator(high, low, close, window=20).cci()
+    psar = PSARIndicator(high, low, close)
+    features["psar"] = psar.psar()
+
+    log_returns = np.log(close).diff().fillna(0)
+    features["return_1"] = log_returns
+    features["return_5"] = log_returns.rolling(window=5).sum()
+    features["return_20"] = log_returns.rolling(window=20).sum()
+
+    features["volatility_close"] = log_returns.rolling(window=20).std()
+    high_low = np.log(high / low)
+    features["volatility_parkinson"] = high_low.rolling(window=20).std()
+
+    volume_zscore = (volume - volume.rolling(20).mean()) / volume.rolling(20).std()
+    features["volume_zscore"] = volume_zscore
+
+    frame = pd.DataFrame(features, index=data.index)
+    frame = frame.replace([np.inf, -np.inf], np.nan)
+    frame = frame.dropna().astype(float)
+    return frame
+
+
+__all__ = ["compute_features", "FeatureResult"]