Connected to quant-stack (Python 3.11.9)

In [None]:
# QUANT SCIENCE LLC 
# THE QUANT SCIENTIST PRO ALGORITHMIC TRADER 
# LEVEL 2 PROGRAM
# ****
# CLINIC #6: INTRO TO QSRESEARCH

# QSResearch is a powerful library designed for quantitative research and strategy development.
# It provides tools for data preprocessing, feature engineering, backtesting, and performance analysis.

# * Goal:
#   - Learn how to use the QSResearch library to implement a quantitative research project.

# * Prerequisites:
#   - Clinic #5:
#     - You have already set up the QSConnect environment and connected to the database.
#     - You have created a Zipline bundle from the price data called `qspro_historical_prices_fmp`.

# * INSTALLATION INSTRUCTIONS ----
# - Install the `qsresearch` package from GitHub:
#   ```
#   pip install git+https://github.com/quant-science/QSResearch.git
#   ```

In [None]:
# * LIBRARIES ----

import pandas as pd
import pytimetk as tk
import logging

from zipline.api import date_rules, time_rules

TODAY = "2025-07-31"

In [None]:
# * 1.0 RECAP:

# 1. We have already set up the QSConnect environment and connected to the database in the previous clinic and we have created a Zipline bundle from the price data called `qspro_historical_prices_fmp` (Clinic #5).
# 2. You learned how to backtest a strategy with Zipline in Clinic #2. (Ref. 02_momentum_omega.ipynb)

# 3. The QSResearch library builds on top of Zipline for much faster algorithmic trading strategy research and development: https://github.com/quant-science/QSResearch/tree/master/qsresearch

In [None]:
# * 2.0 QSRESEARCH ----

# * How it works:
# - QSResearch provides a structured way to define and run quantitative research projects.
# - It includes modules for data preprocessing, feature engineering, backtesting, and performance analysis.
# - And, most importantly, it provides an easy way to go from experiment design to backtesting and performance analysis that follows a 100% reproducible data science workflow.

In [None]:
# * Example of a typical algo trading development workflow:
# 1. Extract Zipline Data from Zipline Bundle:
#    - The `get_zipline_history` function retrieves historical price data from the Zipline bundle.
#    - This data can be used for testing your functions.

from qsresearch.utils.zipline import get_zipline_history

# Make sure to adjust your bundle name. Symbols must be in the bundle.
# Here we are using the `qspro_historical_prices_fmp` bundle created in Clinic #5.

df = get_zipline_history(
    bundle_name="qspro_demo_historical_prices_fmp",
    symbols=["AAPL", "APP", "RKLB", "AMZN","PLTR"], # This is a small universe for testing
    end_date=TODAY,
    bar_count=252*3,  # 3 years of daily data
    frequency="1d"
)

df

field,date,symbol,close,high,low,open,volume
0,2022-07-28,AAPL,157.35,157.64,154.41,156.98,81378731.0
1,2022-07-28,AMZN,122.28,122.84,118.08,121.57,82245500.0
2,2022-07-28,APP,35.36,36.00,34.22,35.15,1754977.0
3,2022-07-28,PLTR,10.13,10.20,9.74,10.03,27172854.0
4,2022-07-28,RKLB,4.55,4.55,4.21,4.40,2935584.0
...,...,...,...,...,...,...,...
3775,2025-07-31,AAPL,207.57,209.84,207.16,208.49,64700006.0
3776,2025-07-31,AMZN,234.11,236.53,231.40,235.77,93411639.0
3777,2025-07-31,APP,390.70,397.92,377.52,380.00,7907606.0
3778,2025-07-31,PLTR,158.35,160.89,156.73,159.99,44722735.0


In [None]:
# 2. Select Universe of Stocks:
#    - The `universe_screener` allows you to filter and select a universe

from qsresearch.preprocessors import universe_screener

df_screened = universe_screener(
    df,
    volume_top_n=3,
)

df_screened

2025-08-04 22:57:48,622 - INFO - Starting universe screening process.
2025-08-04 22:57:48,623 - INFO - Using max date from data: 2025-07-31 00:00:00
2025-08-04 22:57:48,623 - INFO - Lookback period: 2023-08-01 00:00:00 to 2025-07-31 00:00:00 (730 days)
2025-08-04 22:57:48,645 - INFO - Converted Pandas DataFrame to Polars DataFrame
2025-08-04 22:57:48,655 - INFO - Cast date column to Date type
2025-08-04 22:57:48,661 - INFO - Filtered data to time frame: 2023-08-01 00:00:00 to 2025-07-31 00:00:00, rows=2515
2025-08-04 22:57:48,661 - INFO - Skipped abnormal price movement filter (percent change filter).
2025-08-04 22:57:48,661 - INFO - Skipped volatility filter.
2025-08-04 22:57:48,661 - INFO - Skipped minimum average volume filter.
2025-08-04 22:57:48,673 - INFO - Applied volume filter, kept top 3 symbols, rows=1509
2025-08-04 22:57:48,673 - INFO - Skipped minimum last price filter.
2025-08-04 22:57:48,673 - INFO - Skipped minimum average price filter.
2025-08-04 22:57:48,677 - INFO - A

field,date,symbol,close,high,low,open,volume
0,2022-07-28,AAPL,157.35,157.64,154.41,156.98,81378731.0
1,2022-07-28,AMZN,122.28,122.84,118.08,121.57,82245500.0
3,2022-07-28,PLTR,10.13,10.20,9.74,10.03,27172854.0
5,2022-07-29,AAPL,162.51,163.63,159.50,161.24,101786900.0
6,2022-07-29,AMZN,134.95,137.65,132.41,134.90,148892900.0
...,...,...,...,...,...,...,...
3771,2025-07-30,AMZN,230.19,231.80,229.29,231.64,32993300.0
3773,2025-07-30,PLTR,158.61,159.38,156.56,157.37,40261700.0
3775,2025-07-31,AAPL,207.57,209.84,207.16,208.49,64700006.0
3776,2025-07-31,AMZN,234.11,236.53,231.40,235.77,93411639.0


In [None]:
# 3. Preprocessing Price Data:
#    - The `preprocess_price_data` function cleans and prepares the price data for analysis.

from qsresearch.preprocessors import preprocess_price_data

df_preprocessed = preprocess_price_data(
    df_screened,
)

df_preprocessed.glimpse()

2025-08-04 22:57:48,717 - INFO - Preprocessed data shape: (2268, 17)


<class 'pandas.core.frame.DataFrame'>: 2268 rows of 17 columns
symbol:                  object            ['AAPL', 'AAPL', 'AAPL', 'AAP ...
date:                    datetime64[ms]    [Timestamp('2022-07-28 00:00: ...
open:                    float64           [156.98, 161.24, 161.01, 160. ...
high:                    float64           [157.64, 163.63, 163.59, 162. ...
low:                     float64           [154.41, 159.5, 160.89, 159.6 ...
close:                   float64           [157.35, 162.51, 161.51, 160. ...
volume:                  float64           [81378731.0, 101786900.0, 678 ...
flag_missing_original:   bool              [False, False, False, False,  ...
flag_zero_replaced:      bool              [False, False, False, False,  ...
flag_duplicate_dropped:  bool              [False, False, False, False,  ...
flag_large_gap:          bool              [False, False, False, False,  ...
flag_ohlc_corrected:     bool              [False, False, False, False,  ...
flag_high_pri

In [None]:
# 4. Feature Engineering:
#    - The `add_technical_indicators` function adds features to the dataset. More feature engineering functions can be found in the `qsresearch.features` module.

from qsresearch.features import add_technical_indicators

df_engineered = add_technical_indicators(
    df_preprocessed,
    compute_rolling_risk=False,  # Takes long to run
    compute_qs_momentum=True,
    
)

df_engineered.glimpse()

2025-08-04 22:57:48,728 - INFO - Starting technical indicator augmentation.
2025-08-04 22:57:48,731 - INFO - Added cumulative return for 20 days
2025-08-04 22:57:48,731 - INFO - Added cumulative return for 60 days
2025-08-04 22:57:48,732 - INFO - Added cumulative return for 120 days
2025-08-04 22:57:48,733 - INFO - Added volatility for 20 days
2025-08-04 22:57:48,734 - INFO - Added volatility for 60 days
2025-08-04 22:57:48,735 - INFO - Added ATR for 14 days
2025-08-04 22:57:48,735 - INFO - Added volume ratio for 20 days
2025-08-04 22:57:48,736 - INFO - Added OBV
2025-08-04 22:57:48,744 - INFO - Added MACD with periods 12_26_9
2025-08-04 22:57:48,749 - INFO - Added MACD with periods 50_200_30
2025-08-04 22:57:48,754 - INFO - Added MACD histogram difference
2025-08-04 22:57:48,760 - INFO - Added PPO with periods 12_26
2025-08-04 22:57:48,765 - INFO - Added PPO with periods 50_200
2025-08-04 22:57:48,813 - INFO - Added QS Momentum
2025-08-04 22:57:48,821 - INFO - Added ROC features
2025-

<class 'pandas.core.frame.DataFrame'>: 2268 rows of 93 columns
symbol:                               object            ['AAPL', 'AAPL', ...
date:                                 datetime64[ms]    [Timestamp('2022 ...
open:                                 float64           [156.98, 161.24, ...
high:                                 float64           [157.64, 163.63, ...
low:                                  float64           [154.41, 159.5,  ...
close:                                float64           [157.35, 162.51, ...
volume:                               float64           [81378731.0, 101 ...
flag_missing_original:                bool              [False, False, F ...
flag_zero_replaced:                   bool              [False, False, F ...
flag_duplicate_dropped:               bool              [False, False, F ...
flag_large_gap:                       bool              [False, False, F ...
flag_ohlc_corrected:                  bool              [False, False, F ...
flag_high_pri

In [None]:
# 5. Algorithms
#    - The `use_factor_as_signal` function allows you to use a specific factor (column) as a trading signal.

from qsresearch.strategies.factor.algorithms import use_factor_as_signal

train_data = df_engineered[df_engineered["date"] < TODAY]

predict_data = df_engineered[df_engineered["date"] == TODAY]

predict_data.glimpse()

factor_signal = use_factor_as_signal(
    train_data=train_data,
    predict_data=predict_data,
    factor_column="close_fastqsmom_21_252_126",
)

factor_signal

predict_data["factor_signal"] = factor_signal

predict_data.glimpse()

<class 'pandas.core.frame.DataFrame'>: 3 rows of 93 columns
symbol:                               object            ['AAPL', 'AMZN', ...
date:                                 datetime64[ms]    [Timestamp('2025 ...
open:                                 float64           [208.49, 235.77, ...
high:                                 float64           [209.84, 236.53, ...
low:                                  float64           [207.16, 231.4,  ...
close:                                float64           [207.57, 234.11, ...
volume:                               float64           [64700006.0, 934 ...
flag_missing_original:                bool              [False, False, F ...
flag_zero_replaced:                   bool              [False, False, F ...
flag_duplicate_dropped:               bool              [False, False, F ...
flag_large_gap:                       bool              [False, False, F ...
flag_ohlc_corrected:                  bool              [False, False, F ...
flag_high_price:

In [None]:
# 6. Portfolio Construction:
#    - The `long_short_equal_weight_portfolio` function constructs a portfolio based on the factor signal.

from qsresearch.strategies.factor.portfolio_construction import long_short_equal_weight_portfolio

weights = long_short_equal_weight_portfolio(
    predictions=predict_data["factor_signal"],
    num_long_positions=2,
    long_threshold=0,
)

weights

{2267: 0.5, 1511: 0.5}

In [None]:
# 7. Backtesting:
#   - The `run_backtest` function allows you to backtest the portfolio using historical data running this algorithm in a loop using Zipline.
#   - We develop a configuration dictionary that defines the backtest parameters and the algorithm to run.
#   - The CONFIG integrates our workflow steps into a single backtest configuration.

from qsresearch.strategies.factor import run_backtest

PREDICTOR_COLS = ["close_fastqsmom_21_252_126"]

CONFIG = {
    
    # MLFlow Tracking
    "use_mlflow": True,
    "mlflow_experiment_name": "Test Strategies",
    "mlflow_run_name": "Test Strategy 2",
    "mlflow_tags": {
        "strategy": "test", "portfolio": "equal_weight"
    },

    # BACKTEST PARAMETERS:
    "bundle_name": "qspro_demo_historical_prices_fmp",
    "start_date": pd.Timestamp("2025-05-01"), # NOTE - I'm making this small for testing
    "end_date": pd.Timestamp(TODAY),
    "capital_base": 1_000_000,    
    "benchmark_symbol": "SPY",  # Set to None to skip benchmark
    "window_length": 252 * 3,  # zipline bar count window for training and prediction
    "frequency": "1d",
    "predictor_cols": PREDICTOR_COLS,
    "calendar_name": "NYSE",  # Default calendar, can be changed
    "extra_init": None,  # Optional custom initialization function
    "custom_handle_data": None,  # Optional custom handle_data function
    
    # BACKTEST FUNCTIONS
    "rebalance_schedule": {
        "date_rule": date_rules.month_start(),
        "time_rule": time_rules.market_open(minutes=60),
    },
    "transaction_costs": {
        "slippage": {"spread": 0.01},
        "commission": {"cost": 0.005, "min_trade_cost": 0},
    },
    # # Add stop-loss settings
    # 'stop_loss': {
    #     'long_threshold': 0.10,  # 10% stop-loss
    #     'short_threshold': 0.10,  # 10% stop-loss
    #     'date_rule': date_rules.every_day(),
    #     'time_rule': time_rules.market_open(minutes=60),
    # },
    
    # Preprocessing steps applied sequentially
    "preprocess": [
        {
            "name": "screener",
            "func": universe_screener,  # Assuming this is from your module
            "params": {
                "lookback_days": 2 * 365,
                "volume_top_n": 10, # NOTE - I'M MAKING THIS VERY SMALL FOR TESTING
                "momentum_top_n": None,
                "percent_change_filter": False,
                "max_percent_change": 0.35,
                "volatility_filter": True,
                "max_volatility": 0.25,
                "min_avg_volume": 100_000,
                "min_avg_price": 4.0,
                "min_last_price": 5.0,
                "symbol_column": "symbol",
                "date_column": "date",
                "close_column": "close",
                "volume_column": "volume",
            },
        },
        {
            "name": "price_preprocessor",
            "func": preprocess_price_data,
            "params": {
                "min_trading_days": 252 * 2,
                "remove_low_trading_days": True,
                "remove_large_gaps": True,
                "remove_low_volume": True,
                "symbol_column": "symbol",
                "date_column": "date",
                "open_column": "open",
                "high_column": "high",
                "low_column": "low",
                "close_column": "close",
                "volume_column": "volume",
                "engine": "polars",
            },
        },
        {
            "name": "technical_indicators",
            "func": add_technical_indicators,
            "params": {
                "date_column": "date",
                "symbol_column": "symbol",
                "close_column": "close",
                "high_column": "high",
                "low_column": "low",
                "volume_column": "volume",
                "compute_rolling_risk": False,
                "compute_qs_momentum": True,  # Add QS Momentum Factor
            },
        }
    ],
    
    # Algorithm
    "algorithm": {
        "func": use_factor_as_signal,
        "params": {
            "factor_column": PREDICTOR_COLS[0],
        },
    },
    
    # Portfolio Construction:
    "portfolio_strategy": {
        "func": long_short_equal_weight_portfolio,
        "params": {
            "num_long_positions": 5, # NOTE - I'M MAKING THIS SMALL FOR TESTING
            "long_threshold": 1.00,
            # 'num_short_positions': 20,
            # 'short_threshold': -1.00,
        },
    },
}

performance_df = run_backtest(CONFIG)

performance_df.glimpse()

2025/08/04 22:57:50 INFO mlflow.tracking.fluent: Experiment with name 'Test Strategies' does not exist. Creating a new experiment.
2025-08-04 22:57:50,222 - INFO - Initializing backtest
2025-08-04 22:57:50,256 - INFO - Benchmark symbol: SPY
2025-08-04 22:57:50,355 - INFO - No 'stop_loss' key found in config; stop-loss disabled.
2025-08-04 22:57:50,483 - INFO - Handling data for 2025-05-01 20:00:00+00:00
2025-08-04 22:57:50,485 - INFO - Rebalancing on 2025-05-01 00:00:00
2025-08-04 22:57:59,353 - INFO - Applying preprocessing step: screener
2025-08-04 22:57:59,354 - INFO - Starting universe screening process.
2025-08-04 22:57:59,355 - INFO - Using max date from data: 2025-05-01 00:00:00
2025-08-04 22:57:59,355 - INFO - Lookback period: 2023-05-02 00:00:00 to 2025-05-01 00:00:00 (730 days)
2025-08-04 22:57:59,457 - INFO - Converted Pandas DataFrame to Polars DataFrame
2025-08-04 22:57:59,462 - INFO - Cast date column to Date type
2025-08-04 22:57:59,468 - INFO - Filtered data to time fra

Generating tear sheet...


Start date,2025-05-01,2025-05-01
End date,2025-07-31,2025-07-31
Total months,3,3
Unnamed: 0_level_3,Backtest,Unnamed: 2_level_3
Annual return,168.386%,
Cumulative returns,27.994%,
Annual volatility,56.327%,
Sharpe ratio,2.04,
Calmar ratio,12.68,
Stability,0.83,
Max drawdown,-13.278%,
Omega ratio,1.47,
Sortino ratio,2.92,
Skew,-0.77,


Worst drawdown periods,Net drawdown in %,Peak date,Valley date,Recovery date,Duration
0,13.28,2025-05-01,2025-05-06,2025-05-13,9
1,9.95,2025-06-03,2025-06-05,2025-06-11,7
2,9.4,2025-06-26,2025-07-01,2025-07-14,13
3,7.86,2025-05-14,2025-05-21,2025-05-30,13
4,3.2,2025-07-17,2025-07-22,2025-07-23,5


2025-08-04 22:58:13,393 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-08-04 22:58:13,395 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.


Stress Events,mean,min,max
Covid,0.46%,-12.89%,8.73%


Top 10 long positions of all time,max
sid,Unnamed: 1_level_1
PLTR,108.02%


Top 10 short positions of all time,max
sid,Unnamed: 1_level_1


Top 10 positions of all time,max
sid,Unnamed: 1_level_1
PLTR,108.02%


2025-08-04 22:58:14,861 - INFO - Logging Pyfolio artifact to: performance_metrics


HTML tear sheet saved as: pyfolio_tear_sheet.html
Dropped 7.9% entries from factor data: 7.9% in forward returns computation and 0.0% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
- Generating full tear sheet...
Quantiles Statistics


Unnamed: 0_level_0,min,max,mean,std,count,count %
factor_quantile,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,-0.827394,-0.391988,-0.597549,0.14706,116,25.0
2,-0.514524,-0.298223,-0.367983,0.095506,58,12.5
3,-0.399147,0.128977,-0.134471,0.177735,116,25.0
4,-0.009289,0.138443,0.051973,0.066334,58,12.5
5,0.034712,4.213367,1.881717,1.792746,116,25.0


Returns Analysis


Unnamed: 0,5D
Ann. alpha,0.007
beta,-0.417
Mean Period Wise Return Top Quantile (bps),-106.659
Mean Period Wise Return Bottom Quantile (bps),114.022
Mean Period Wise Spread (bps),-220.68


2025-08-04 22:58:14,949 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.
2025-08-04 22:58:14,951 - INFO - Using categorical units to plot a list of strings that are all parsable as floats or dates. If these strings should be plotted as numbers, cast to the appropriate data type before plotting.


Information Analysis


Unnamed: 0,5D
IC Mean,-0.631
IC Std.,0.314
Risk-Adjusted IC,-2.011
t-stat(IC),
p-value(IC),
IC Skew,
IC Kurtosis,


Turnover Analysis


Unnamed: 0,5D
Quantile 1 Mean Turnover,0.047
Quantile 2 Mean Turnover,0.094
Quantile 3 Mean Turnover,0.142
Quantile 4 Mean Turnover,0.189
Quantile 5 Mean Turnover,0.047


Unnamed: 0,5D
Mean Factor Rank Autocorrelation,0.984


2025-08-04 22:58:16,363 - INFO - Logging Alphalens artifact to: performance_metrics


HTML tear sheet saved as: alphalens_tear_sheet.html


2025-08-04 22:58:16,567 - INFO - MLflow run URL: file:///Users/brucebrownlee/Dev/GitHub/Resident/QS-Project/Clinic-06/mlruns/339682750807434492/42061e0af2b94a929a116aaf830a763e/artifacts
2025-08-04 22:58:16,568 - INFO - Use the MLflow UI to view the run details by running this command in terminal: `mlflow ui`


<class 'pandas.core.frame.DataFrame'>: 63 rows of 40 columns
period_open:              datetime64[ns, UTC] [Timestamp('2025-05-01 13: ...
period_close:             datetime64[ns, UTC] [Timestamp('2025-05-01 20: ...
ending_cash:              float64           [1000000.0, -69515.450000000 ...
short_value:              float64           [0.0, 0.0, 0.0, 0.0, 0.0, 0. ...
returns:                  float64           [0.0, -8.605000000017071e-05 ...
net_leverage:             float64           [0.0, 1.0695214323192512, 1. ...
starting_cash:            float64           [1000000.0, 1000000.0, -6951 ...
gross_leverage:           float64           [0.0, 1.0695214323192512, 1. ...
ending_value:             float64           [0.0, 1069429.4, 1065040.849 ...
orders:                   object            [[{'id': '63f814be8ad04e93ab ...
shorts_count:             int64             [0, 0, 0, 0, 0, 0, 0, 0, 0,  ...
ending_exposure:          float64           [0.0, 1069429.4, 1065040.849 ...
long_exposure: 

In [None]:
# * MLFLOW TRACKING ----

# Run in terminal:
#   mlflow server

# * NEXT STEPS:

# - Knowledge Check: Try adding a new function that adds a custom feature to the dataset.

# - Now you know how it works, we'll examine the QS Momentum Factor strategy.

No kernel connected