In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')

from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.vec_env import DummyVecEnv

from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)
check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])

import warnings

warnings.filterwarnings('ignore')
import itertools



In [2]:
START_DATE = '1926-07-01'
END_DATE = '2025-04-30'
TRAIN_START_DATE = '1980-01-01'
TRAIN_END_DATE = '2014-01-05'
TRADE_START_DATE = '2014-01-06'
TEST_START_DATE = '1990-01-01'


In [3]:
TRAIN_START_DATE

'1980-01-01'

In [4]:
df = pd.read_csv('datasets/Industry_daily.csv')
df.head()
# Convert the Date column to datetime format
df['Date'] = pd.to_datetime(df['Date'], format='%Y%m%d')

# Create price_long dataframe with proper date formatting
price_long = pd.melt(df, id_vars=['Date'], value_vars=['NoDur', 'Durbl', 'Manuf', 'Enrgy', 'HiTec', 'Telcm', 'Shops', 'Hlth', 'Utils', 'Other'])
price_long = price_long.rename(columns={'Date': 'date', 'variable': 'tic', 'value': 'close'})

# Add required columns with same values as close price since we don't have this data
price_long['open'] = price_long['close'] 
price_long['high'] = price_long['close']
price_long['low'] = price_long['close']

# Sort by date and tic
price_long = price_long.sort_values(['date', 'tic']).reset_index(drop=True)
price_long.head()



Unnamed: 0,date,tic,close,open,high,low
0,1926-07-01,Durbl,-0.28,-0.28,-0.28,-0.28
1,1926-07-01,Enrgy,0.57,0.57,0.57,0.57
2,1926-07-01,HiTec,-0.21,-0.21,-0.21,-0.21
3,1926-07-01,Hlth,0.97,0.97,0.97,0.97
4,1926-07-01,Manuf,-0.23,-0.23,-0.23,-0.23


In [5]:
def returns_to_prices_vectorized(returns_df, initial_price=100):
    returns_df = returns_df.copy()
    returns_df['close'] = returns_df['close'] / 100 +1

    returns_df['close'] = returns_df.groupby('tic')['close'].cumprod()

    returns_df['open'] = returns_df['close']
    returns_df['high'] = returns_df['close']
    returns_df['low'] = returns_df['close']

    return returns_df

price_long = returns_to_prices_vectorized(price_long, initial_price=100)
price_long.head()

Unnamed: 0,date,tic,close,open,high,low
0,1926-07-01,Durbl,0.9972,0.9972,0.9972,0.9972
1,1926-07-01,Enrgy,1.0057,1.0057,1.0057,1.0057
2,1926-07-01,HiTec,0.9979,0.9979,0.9979,0.9979
3,1926-07-01,Hlth,1.0097,1.0097,1.0097,1.0097
4,1926-07-01,Manuf,0.9977,0.9977,0.9977,0.9977


In [6]:
fe = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list=INDICATORS,
    use_turbulence=False,
    user_defined_feature=False
)

processed = fe.preprocess_data(price_long)
processed.shape

Successfully added technical indicators


(259820, 14)

In [7]:
list_ticker = processed["tic"].unique().tolist()
list_date = list(pd.date_range(processed['date'].min(),processed['date'].max()).astype(str))
combination = list(itertools.product(list_date,list_ticker))



In [8]:
combination = pd.DataFrame(combination, columns=['date', 'tic'])
# Convert date column in combination to datetime to match processed dataframe
combination['date'] = pd.to_datetime(combination['date'])
processed_full = combination.merge(processed, on=['date', 'tic'], how='left')
processed_full = processed_full[processed_full['date'].isin(processed['date'])]
processed_full = processed_full.sort_values(['date', 'tic'])

processed_full = processed_full.fillna(0)
processed_full.head()


Unnamed: 0,date,tic,close,open,high,low,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma
0,1926-07-01,Durbl,0.9972,0.9972,0.9972,0.9972,0.0,1.017625,0.987445,100.0,66.666667,100.0,0.9972,0.9972
1,1926-07-01,Enrgy,1.0057,1.0057,1.0057,1.0057,0.0,1.017625,0.987445,100.0,66.666667,100.0,1.0057,1.0057
2,1926-07-01,HiTec,0.9979,0.9979,0.9979,0.9979,0.0,1.017625,0.987445,100.0,66.666667,100.0,0.9979,0.9979
3,1926-07-01,Hlth,1.0097,1.0097,1.0097,1.0097,0.0,1.017625,0.987445,100.0,66.666667,100.0,1.0097,1.0097
4,1926-07-01,Manuf,0.9977,0.9977,0.9977,0.9977,0.0,1.017625,0.987445,100.0,66.666667,100.0,0.9977,0.9977


In [9]:
processed_full = processed_full.sort_values(['date','tic'], ignore_index=True)
processed_full.index = processed_full.date.factorize()[0]

cov_list = []
return_list = []

lookback = 480
for i in range(lookback, len(processed_full.index.unique())):
    data_lookback = processed_full.iloc[i-lookback:i]
    price_lookback = data_lookback.pivot_table(index='date', columns='tic', values='close')
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    cov = return_lookback.cov().values
    cov_list.append(cov)

df_cov = pd.DataFrame({'date': processed_full.date.unique()[lookback:], 'cov_list':cov_list, 'return_list':return_list})
processed_full = processed_full.merge(df_cov, on='date')
processed_full = processed_full.sort_values(['date', 'tic']).reset_index(drop=True)

processed_full.head()


Unnamed: 0,date,tic,close,open,high,low,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,cov_list,return_list
0,1928-02-06,Durbl,2.008443,2.008443,2.008443,2.008443,0.002385,2.02802,1.948959,53.864537,29.67407,6.963338,1.997875,1.976695,"[[0.00023139518038852933, 4.7798223866789966e-...",tic Durbl Enrgy HiTec Hlth M...
1,1928-02-06,Enrgy,0.980183,0.980183,0.980183,0.980183,-0.00904,1.031943,0.974752,36.716143,-157.928381,50.32602,1.009746,1.010272,"[[0.00023139518038852933, 4.7798223866789966e-...",tic Durbl Enrgy HiTec Hlth M...
2,1928-02-06,HiTec,1.613793,1.613793,1.613793,1.613793,0.004329,1.637618,1.576751,54.237195,34.8892,6.101492,1.607898,1.593451,"[[0.00023139518038852933, 4.7798223866789966e-...",tic Durbl Enrgy HiTec Hlth M...
3,1928-02-06,Hlth,1.669301,1.669301,1.669301,1.669301,0.006687,1.708407,1.64019,54.629899,16.566485,3.053309,1.665174,1.648649,"[[0.00023139518038852933, 4.7798223866789966e-...",tic Durbl Enrgy HiTec Hlth M...
4,1928-02-06,Manuf,1.59711,1.59711,1.59711,1.59711,0.002618,1.622752,1.559963,53.837268,20.572064,3.93354,1.593258,1.582827,"[[0.00023139518038852933, 4.7798223866789966e-...",tic Durbl Enrgy HiTec Hlth M...


In [10]:
train = data_split(processed_full, start=TRAIN_START_DATE, end=TRAIN_END_DATE)
trade = data_split(processed_full, start=TRADE_START_DATE, end=TRADE_END_DATE)

In [11]:
# stock_dimension = len(train.tic.unique())
# state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension
# print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

stock_dimension = len(train.tic.unique())
state_space = stock_dimension
print(f'Stock Dimension: {stock_dimension}, State Space: {state_space}')


Stock Dimension: 10, State Space: 10


In [12]:
env_kwargs = {
    'hmax': 100,
    'initial_amount': 1000000,
    'transaction_cost_pct': 0.005,
    'state_space': state_space,
    'stock_dim': stock_dimension,
    'tech_indicator_list': INDICATORS,
    'action_space': stock_dimension,
    'reward_scaling': 1e-4
}

e_train_gym = StockPortfolioEnv(df=train, **env_kwargs)
e_trade_gym = StockPortfolioEnv(df=trade, **env_kwargs)

In [13]:
models_to_train = {
    "PPO": {
        'total_timesteps': 50000,
        'policy': 'MlpPolicy',
        'model_kwargs': {
            'learning_rate': 0.0003,
            'n_steps': 2048,
            'batch_size': 64,
            'n_epochs': 10,
            'gamma': 0.99,
            'gae_lambda': 0.95,
            'clip_range': 0.2,
            'ent_coef': 0.0,
            'vf_coef': 0.5,
            'max_grad_norm': 0.5,
            # tensorboard_log removed - FinRL handles this separately
        },
    },
    'A2C': {
        'total_timesteps': 50000,
        'policy': 'MlpPolicy',
        'model_kwargs': {
            'learning_rate': 0.0007,
            'n_steps': 5,
            'gamma': 0.99,
            'gae_lambda': 1.0,
            'ent_coef': 0.01,
            'vf_coef': 0.25,
            'max_grad_norm': 0.5,
            # tensorboard_log removed - FinRL handles this separately
        },
    },
    'DDPG': {
        'total_timesteps': 50000,
        'policy': 'MlpPolicy',
        'model_kwargs': {
            'learning_rate': 0.001,
            'buffer_size': 1000000,
            'learning_starts': 100,
            'batch_size': 100,
            'tau': 0.005,
            'gamma': 0.99,
            # tensorboard_log removed - FinRL handles this separately
        },
    },
}

In [14]:

# Train all models
trained_models = {}
model_results = {}

for model_name, config in models_to_train.items():
    print(f"\n{'='*50}")
    print(f"Training {model_name} model...")
    print(f"{'='*50}")
    
    try:
        # Create agent
        agent = DRLAgent(env=e_train_gym)
        
        # Get model
        model = agent.get_model(
            model_name=model_name.lower(),
            policy=config['policy'],
            model_kwargs=config['model_kwargs']
        )
        
        # Train model
        trained_model = agent.train_model(
            model=model,
            total_timesteps=config['total_timesteps'],
            tb_log_name=model_name.lower()
        )
        
        # Save model
        model_path = f"{TRAINED_MODEL_DIR}/{model_name.lower()}_ff_model"
        trained_model.save(model_path)
        trained_models[model_name] = trained_model
        
        print(f"{model_name} training completed and saved!")
        
    except Exception as e:
        print(f"Error training {model_name}: {str(e)}")
        continue

print(f"\nSuccessfully trained {len(trained_models)} models")


Training PPO model...
{'learning_rate': 0.0003, 'n_steps': 2048, 'batch_size': 64, 'n_epochs': 10, 'gamma': 0.99, 'gae_lambda': 0.95, 'clip_range': 0.2, 'ent_coef': 0.0, 'vf_coef': 0.5, 'max_grad_norm': 0.5}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| time/              |           |
|    fps             | 377       |
|    iterations      | 1         |
|    time_elapsed    | 5         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 3510932.0 |
----------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 340          |
|    iterations           | 2            |
|    time_elapsed         | 12           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 9.720679e-09 |
|    clip_fraction        | 0        

In [15]:
# trained_models['PPO'].policy

In [16]:
def test_model(model, model_name, env):
    """
    Test a trained model return results.
    """
    print(f'\nTesting {model_name}...')

    obs = env.reset()

    for i in range(len(env.get_attr('df')[0].index.unique())-1):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        if dones:
            break

    # Calculate daily returns from asset_memory
    import numpy as np
    asset_memory = env.get_attr('asset_memory')[0]
    daily_returns = np.diff(asset_memory) / asset_memory[:-1]

    results = {
        'final_value': asset_memory[-1],
        'total_return': (asset_memory[-1] / env.get_attr('initial_amount')[0] - 1) * 100,
        'daily_returns': daily_returns,
        'asset_memory': asset_memory,
        'date_memory': env.get_attr('date_memory')[0],
        'actions_memory': env.get_attr('actions_memory')[0]
    }
    return results

In [17]:
# Test all trained models
test_results = {}

for model_name, model in trained_models.items():
    try:
        # Create a vectorized environment for testing
        test_env = DummyVecEnv([lambda: e_trade_gym])
        
        # Reset test environment
        obs = test_env.reset()  # This will return just the observation
        
        # Test model
        results = test_model(model, model_name, test_env)
        test_results[model_name] = results
        
        print(f"{model_name} Results:")
        print(f"  Final Portfolio Value: ${results['final_value']:,.2f}")
        print(f"  Total Return: {results['total_return']:.2f}%")
        
        # Calculate additional metrics
        daily_returns = pd.Series(results['daily_returns'])
        sharpe_ratio = daily_returns.mean() / daily_returns.std() * np.sqrt(252)
        max_drawdown = (daily_returns.cumsum() - daily_returns.cumsum().expanding().max()).min()
        
        print(f"  Sharpe Ratio: {sharpe_ratio:.4f}")
        print(f"  Max Drawdown: {max_drawdown:.4f}")
        
    except Exception as e:
        print(f"Error testing {model_name}: {str(e)}")
        continue


Testing PPO...
PPO Results:
  Final Portfolio Value: $2,699,085.02
  Total Return: 169.91%
  Sharpe Ratio: 0.8198
  Max Drawdown: -0.4126

Testing A2C...
A2C Results:
  Final Portfolio Value: $2,963,330.86
  Total Return: 196.33%
  Sharpe Ratio: 0.8532
  Max Drawdown: -0.4378

Testing DDPG...
DDPG Results:
  Final Portfolio Value: $2,657,911.12
  Total Return: 165.79%
  Sharpe Ratio: 0.8359
  Max Drawdown: -0.3827


In [18]:
# Create comparsion dataframe
comparison_data = []
for model_name, results in test_results.items():
    daily_returns = pd.Series(results['daily_returns'])
    sharpe_ratio = daily_returns.mean() / daily_returns.std() * np.sqrt(252)
    max_drawdown = (daily_returns.cumsum() - daily_returns.cumsum().expanding().max()).min()

    comparison_data.append({
        'Model': model_name,
        'Final Value': results['final_value'],
        'Total Return (%)': results['total_return'],
        'Sharpe Ratio': sharpe_ratio,
        'Max Drawdown': max_drawdown
    })

comparison_df = pd.DataFrame(comparison_data)
comparison_df = comparison_df.sort_values('Total Return (%)', ascending=False)

print("\nModel Performance Ranking:")
print(comparison_df.to_string(index=False))

# Save results
comparison_df.to_csv(f"{RESULTS_DIR}/industry_model_comparison_{lookback}_window.csv", index=False)

# Create performance visualization
plt.figure(figsize=(15, 10))

# Plot 1: Portfolio Value Over Time
plt.subplot(2, 2, 1)
for model_name, results in test_results.items():
    dates = pd.to_datetime(results['date_memory'])
    values = results['asset_memory']
    plt.plot(dates, values, label=f"{model_name}", linewidth=2)

plt.title('Portfolio Value Over Time (Industry Model: {} Window)'.format(lookback))
plt.xlabel('Date')
plt.ylabel('Portfolio Value ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)

# Plot 2: Total Returns Comparison
plt.subplot(2, 2, 2)
models = comparison_df['Model']
returns = comparison_df['Total Return (%)']
colors = plt.cm.viridis(np.linspace(0, 1, len(models)))
bars = plt.bar(models, returns, color=colors)
plt.title('Total Returns Comparison')
plt.xlabel('Model')
plt.ylabel('Total Return (%)')
plt.xticks(rotation=45)
for bar, ret in zip(bars, returns):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, 
             f'{ret:.1f}%', ha='center', va='bottom')

# Plot 3: Sharpe Ratio Comparison
plt.subplot(2, 2, 3)
sharpe_ratios = comparison_df['Sharpe Ratio']
bars = plt.bar(models, sharpe_ratios, color=colors)
plt.title('Sharpe Ratio Comparison')
plt.xlabel('Model')
plt.ylabel('Sharpe Ratio')
plt.xticks(rotation=45)
for bar, sharpe in zip(bars, sharpe_ratios):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
             f'{sharpe:.3f}', ha='center', va='bottom')

# Plot 4: Max Drawdown Comparison (negative values, so we flip for visualization)
plt.subplot(2, 2, 4)
drawdowns = comparison_df['Max Drawdown'].abs()
bars = plt.bar(models, drawdowns, color=colors)
plt.title('Max Drawdown Comparison')
plt.xlabel('Model')
plt.ylabel('Max Drawdown (abs)')
plt.xticks(rotation=45)
for bar, dd in zip(bars, drawdowns):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
             f'{dd:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.savefig(f"{RESULTS_DIR}/industry_model_comparison_{lookback}_window.png", dpi=300, bbox_inches='tight')
plt.show()



Model Performance Ranking:
Model  Final Value  Total Return (%)  Sharpe Ratio  Max Drawdown
  A2C 2.963331e+06        196.333086      0.853222     -0.437833
  PPO 2.699085e+06        169.908502      0.819840     -0.412611
 DDPG 2.657911e+06        165.791112      0.835930     -0.382699


In [35]:

print(f"\nResults saved to {RESULTS_DIR}/")
print("Analysis complete!")

# Step 9: Feature Importance Analysis (for the best performing model)
print("\n" + "="*60)
print("FEATURE IMPORTANCE ANALYSIS")
print("="*60)

best_model_name = comparison_df.iloc[0]['Model']
print(f"Analyzing feature importance for best model: {best_model_name}")

# Print FF factor statistics
print("\nIndustry Factor Statistics in Dataset:")
for i in ['boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma' ]:
    factor_data = processed_full[i]
    print(f"{i}:")
    print(f"  Mean: {factor_data.mean():.6f}")
    print(f"  Std:  {factor_data.std():.6f}")
    print(f"  Min:  {factor_data.min():.6f}")
    print(f"  Max:  {factor_data.max():.6f}")


Results saved to results/
Analysis complete!

FEATURE IMPORTANCE ANALYSIS
Analyzing feature importance for best model: DDPG

Industry Factor Statistics in Dataset:
boll_lb:
  Mean: 1876.277073
  Std:  4802.400106
  Min:  0.177937
  Max:  48782.728733
rsi_30:
  Mean: 53.992009
  Std:  10.143912
  Min:  10.767050
  Max:  92.790798
cci_30:
  Mean: 24.526670
  Std:  112.532043
  Min:  -683.239698
  Max:  651.973013
dx_30:
  Mean: 22.981763
  Std:  16.513114
  Min:  0.000182
  Max:  94.030229
close_30_sma:
  Mean: 1952.217829
  Std:  4991.042794
  Min:  0.217286
  Max:  49355.706814
close_60_sma:
  Mean: 1940.663028
  Std:  4959.959259
  Min:  0.228138
  Max:  48942.895230


In [32]:
print(f'\nTotal Features used: {len()}')

Model                         DDPG
Final Value         2760673.542961
Total Return (%)        176.067354
Sharpe Ratio               0.85105
Max Drawdown             -0.405062
Name: 2, dtype: object