In [1]:
# Required imports
import numpy as np
import pandas as pd
from pathlib import Path
import os
import duckdb
import warnings
warnings.filterwarnings('ignore')
%matplotlib widget
import matplotlib.pyplot as plt
import mplcursors  
from pandas import Timestamp

from factor_tools import create_extended_analysis_data
from backtest_tools import (
    CommissionModel, 
    FixedSlippageModel,
    BrokersModel,
    Trading,
    Backtest,
    load_distribution_data
)

In [None]:
'''
# Strategy base class and implementations
class PortfolioStrategy:
    """Base class for portfolio generation strategies"""
    
    def generate_portfolio_targets(self, data_df, min_price=2.0, market_cap_percentile=0.5, stock_num=10, logger=print):
        """
        Generate target portfolio based on strategy logic
        
        Parameters:
        -----------
        data_df : DataFrame
            Daily stock data including factors
        min_price : float
            Minimum price threshold for stocks
        market_cap_percentile : float
            Market cap percentile threshold (0-1)
        stock_num : int
            Number of stocks to select
        logger : function
            Logging function
            
        Returns:
        --------
        dict : {symbol: target_weight}
        """
        raise NotImplementedError("Subclasses must implement generate_portfolio_targets")


class MultiFactorStrategy(PortfolioStrategy):
    """Strategy with simplified ranking logic using direct rank columns"""
    
    def __init__(self, industry_limit=2, store_intermediate=True):
        """
        Initialize strategy
        
        Parameters:
        -----------
        industry_limit : int
            Maximum number of stocks per industry
        """
        self.industry_limit = industry_limit
        self.store_intermediate = store_intermediate
        self.intermediate_dfs = {}  # Dictionary to store DataFrames at different stages

    def generate_portfolio_targets(self, date, data_df, min_price=2.0, market_cap_percentile=0.5, stock_num=50, logger=print):
        """Generate portfolio targets using simplified approach"""
    
        # Reset intermediate storage for this run
        if self.store_intermediate:
            self.intermediate_dfs = {}

        # Validate data
        if data_df is None or len(data_df) == 0:
            logger("Error: Empty dataframe passed to generate_portfolio_targets")
            return {}

        # Filter universe first
        universe_df = data_df[
            (data_df['dlyprc'] > min_price) &
            (data_df['dlycap'] > data_df['dlycap'].quantile(market_cap_percentile))
        ].copy()
        
        logger(f"Starting universe size: {len(universe_df)} stocks")
        
        # Store initial filtered universe
        if self.store_intermediate:
            self.intermediate_dfs['initial_universe'] = universe_df.copy()

        # STAGE 1: Quality Factors Ranking with direct manual ranking
        logger("Stage 1: Ranking on quality factors")
        
        # Direct manual ranking for each quality factor
        universe_df['cash_flow_to_asset_rank'] = universe_df['cash_flow_to_asset'].rank(ascending=True)
        universe_df['cash_flow_to_liability_rank'] = universe_df['cash_flow_to_liability'].rank(ascending=True)
        universe_df['total_asset_turnover_rate_rank'] = universe_df['total_asset_turnover_rate'].rank(ascending=True)
        
        # Calculate point1 as sum of all quality factor ranks
        universe_df['point1'] = universe_df['cash_flow_to_asset_rank'] + universe_df['cash_flow_to_liability_rank'] + universe_df['total_asset_turnover_rate_rank']
        
        # Sort and keep top 2/3
        universe_df.sort_values(by='point1', ascending=False, inplace=True)
        universe_df = universe_df.iloc[:len(universe_df)//3*2]
        
        logger(f"After quality filter: {len(universe_df)} stocks")

        # Store post-quality filter universe
        if self.store_intermediate:
            self.intermediate_dfs['post_quality_filter'] = universe_df.copy()
        
        # Early exit if no stocks remain
        if len(universe_df) == 0:
            logger("No stocks remain after Stage 1 filtering")
            return {}
        
        # STAGE 2: Growth Factors Ranking with direct manual ranking
        logger("Stage 2: Ranking on growth factors")
        
        # Direct manual ranking for each growth factor
        universe_df['gpoa_4q_growth_rank'] = universe_df['gpoa_4q_growth'].rank(ascending=True)
        universe_df['roe_rank'] = universe_df['roe'].rank(ascending=True)
        universe_df['roic_rank'] = universe_df['roic'].rank(ascending=True)
        
        # Calculate point3 as sum of all growth factor ranks
        universe_df['point3'] = universe_df['gpoa_4q_growth_rank'] + universe_df['roe_rank'] + universe_df['roic_rank']
        
        # Sort and keep top 1/4
        universe_df.sort_values(by='point3', ascending=False, inplace=True)
        universe_df = universe_df.iloc[:len(universe_df)//4]
        
        logger(f"After growth filter: {len(universe_df)} stocks")
        
        # Store post-growth filter universe
        if self.store_intermediate:
            self.intermediate_dfs['post_growth_filter'] = universe_df.copy()
            
        # Early exit if no stocks remain
        if len(universe_df) == 0:
            logger("No stocks remain after Stage 2 filtering")
            return {}
        
        # Early exit if no stocks remain
        if len(universe_df) == 0:
            logger("No stocks remain after Stage 2 filtering")
            return {}
        
        # STAGE 3: Value and Momentum Factors - Direct rank definitions
        logger("Stage 3: Using value and momentum factors")
        
        # Add rank columns with direct definitions (no loops)
        universe_df['earnings_to_price_rank'] = universe_df['earnings_to_price'].rank(ascending=True)
        universe_df['cum_return_252d_offset_21d_rank'] = universe_df['cum_return_252d_offset_21d'].rank(ascending=True)
        universe_df['cum_return_126d_offset_21d_rank'] = universe_df['cum_return_126d_offset_21d'].rank(ascending=True)
        #universe_df['cum_return_21d_rank'] = universe_df['cum_return_21d'].rank(ascending=False)
        
        # Sum up all rank columns to get point8
        universe_df['point8'] = universe_df['earnings_to_price_rank'] + universe_df['cum_return_252d_offset_21d_rank'] + universe_df['cum_return_126d_offset_21d_rank'] #+ universe_df['cum_return_21d_rank']
        
        # Sort by point8 (higher is better)
        universe_df.sort_values(by='point3', ascending=False, inplace=True)
        
        logger(f"After stage 3 filtering: ready for final selection")
        
        # Store final ranked universe
        if self.store_intermediate:
            self.intermediate_dfs['final_ranked'] = universe_df.copy()
        
        # Select final stocks (top N stocks)
        selected_stocks = universe_df.head(stock_num).index
        
        # Store final selected universe
        if self.store_intermediate:
            self.intermediate_dfs['final_selection'] = universe_df.head(stock_num).copy()
        
        # Equal weight portfolio
        weight = 1.0 / len(selected_stocks) if len(selected_stocks) > 0 else 0
        target_portfolio = {int(stock): weight for stock in selected_stocks}
        
        self.save_intermediate_dataframes(date)
        # Log portfolio construction details
        logger(f"Final portfolio: {len(target_portfolio)} stocks selected")
        return target_portfolio
    
    def get_intermediate_dataframes(self):
        """
        Return the stored intermediate DataFrames
        
        Returns:
        --------
        dict : {stage_name: dataframe}
        """
        if not self.store_intermediate:
            return {"error": "Intermediate DataFrame storage is not enabled. Initialize with store_intermediate=True"}
        return self.intermediate_dfs
        
    def save_intermediate_dataframes(self, date, directory="intermediate_dfs"):
        """
        Save all intermediate DataFrames to CSV files
        
        Parameters:
        -----------
        date : str
            Date string for file naming
        directory : str
            Directory to save files in
        """
        import os
        if not self.store_intermediate:
            print("Intermediate DataFrame storage is not enabled. Initialize with store_intermediate=True")
            return
            
        # Create directory if it doesn't exist
        if not os.path.exists(directory):
            os.makedirs(directory)

        date_str = date.strftime('%Y%m%d')
        # Save each DataFrame
        for stage, df in self.intermediate_dfs.items():
            filename = f"{directory}/{date_str}_{stage}.csv"
            df.to_csv(filename)
        
'''


In [3]:
# Define all our factors together (no need to separate them)
all_factors = [
    # Quality factors
    'cash_flow_to_asset',
    'cash_flow_to_liability',
    
    'total_asset_turnover_rate',
    
    
    # Growth factors
    'gpoa_4q_growth',
    'roe',
    'roic',
    'gpoa_20q_growth',
    
    # Value factors
    'earnings_to_price',
    'cum_return_252d_offset_21d',
    'cum_return_126d_offset_21d',
    'cum_return_21d'
]

In [None]:
# Import base strategy classes from strategies.py
from strategies import PortfolioStrategy, MultiFactorStrategy1

# Create strategy instance
strategy = MultiFactorStrategy1(
    industry_limit=2,  # Optional: limit 2 stocks per industry
    store_intermediate=False  # Store intermediate results
)

# Get the list of factors required by this strategy
all_factors = strategy.get_factor_list()
print(f"Strategy requires these factors: {all_factors}")

In [4]:
# Analysis parameters
ANALYSIS_PARAMS = {
    'start_date': '2010-01-05',
    'end_date': '2023-12-30',
    'factor_name': ['gpoa_4q_growth','roe','roic'],
    
}

In [5]:
# Load data
data_df_dic, trading_dates = create_extended_analysis_data(
    ANALYSIS_PARAMS['start_date'],
    ANALYSIS_PARAMS['end_date'],
    all_factors,
    #ANALYSIS_PARAMS['factor_name'],
    'wrds_data.db',
    'factor_data'
)

# Load distribution data
distribution_data = load_distribution_data(
    ANALYSIS_PARAMS['start_date'],
    ANALYSIS_PARAMS['end_date']
)



Processing dates: 100%|██████████| 3522/3522 [04:57<00:00, 11.86it/s]


In [6]:
# Create our strategy - just use all factors together with industry diversification
strategy = MultiFactorStrategy(
    
    industry_limit=2  # Optional: limit 2 stocks per industry
)

In [7]:
commission_model = BrokersModel()
slippagemodel = FixedSlippageModel(0.005)

In [9]:
# Initialize and run backtest
backtest = Backtest(
    cash=10000000,  # Initial cash
    commission_model=commission_model,
    slippage_model= slippagemodel,
    data_df_dic=data_df_dic,
    trading_dates=trading_dates,
    distribution_data=distribution_data,
    factor_list=all_factors, 
    #factor_list=ANALYSIS_PARAMS['factor_name'],
    dir_='backtest_results',
    save_format='html',
    stock_num=50,
    margin_rate=0.06,
    rebalance_freq='1w',
    rebalance_day=4,
    min_price=2.0,
    market_cap_percentile=0.5,
    #buy_and_hold_list=buy_and_hold,
    weight_change_threshold=0.01,
    strategy=strategy 
)

# No need for additional parameter setting
backtest.backtest()

Starting backtest...


100%|██████████| 3521/3521 [06:04<00:00,  9.66it/s]

Backtest completed.





In [None]:
backtest.plot_performance_summary(interactive=True)
plt.show()

In [35]:
backtest.save_performance_summary_pdfs(output_dir='backtest_results',
    dpi=2400)

PDF files have been saved to backtest_results/:
├── 1_metrics_table.pdf
├── 2_cumulative_returns.pdf
├── 3_drawdown_analysis.pdf
└── 4_turnover_analysis.pdf


In [None]:
# Example buy and hold portfolio
'''
buy_and_hold_test_C2 = {
    14593: 0.4,
    16437: 0.2,
    14450: 0.2,
    89349: 0.2,
#2020-06-01-2020-08-31
}
buy_and_hold_test_CS = {
    14593: 0.4,
    80711: 0.2,
    75152: 0.1,
    15950: 0.1, 
    14252: 0.1,
    25129: 0.1
#2020-11-01-2020-12-31
}
buy_and_hold_test_M2MU = {
    14593: 0.1,
    18790:0.1,
    89942:0.1,
    16836:0.1,
    89901:0.1,
    17016:0.1,
    92910:0.1,
    10065:0.1,
    15282:0.1,
    91278:0.1
#2020-09-01-2020-12-31
}
buy_and_hold_test_O1P1 = {
    14593: 0.5,
    49680:0.1,
    26825:0.1,
    88233:0.1,
    17250:0.1,
    88901:0.1
#2023-09-01-2023-11-30
}
buy_and_hold_D1D2 = {
    14593: 0.4,
    12413:0.1,
    13191:0.1,
    81560:0.1,
    90499:0.1,
    20807:0.1,
    14433:0.1
#2022-03-01-2022-04-30
}
buy_and_hold_suspended = {
    14593: 0.4,
    20740: 0.2,
    21847: 0.2,
    14127: 0.2
#2021-11-01-2023-04-30
}
buy_and_hold = {
    14593: 0.4,
    20740: 0.2,
    21847: 0.2,
    14127: 0.2,
    16437: 0.2,
    14450: 0.2,
    89349: 0.2,
    12413:0.1,
    13191:0.1,
    81560:0.1,
    90499:0.1,
    20807:0.1,
    14433:0.1,
    49680:0.1,
    26825:0.1,
    88233:0.1,
    17250:0.1,
    88901:0.1,
    18790:0.1,
    89942:0.1,
    16836:0.1,
    89901:0.1,
    17016:0.1,
    92910:0.1,
    10065:0.1,
    15282:0.1,
    91278:0.1,
    80711: 0.2,
    75152: 0.1,
    15950: 0.1, 
    14252: 0.1,
    25129: 0.1,
#2021-11-01-2023-04-30
}
'''
