In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('Agg')

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3.common.vec_env import DummyVecEnv

from finrl.main import check_and_make_directories
from finrl.config import (
    DATA_SAVE_DIR,
    TRAINED_MODEL_DIR,
    TENSORBOARD_LOG_DIR,
    RESULTS_DIR,
    INDICATORS,
    TRAIN_START_DATE,
    TRAIN_END_DATE,
    TEST_START_DATE,
    TEST_END_DATE,
    TRADE_START_DATE,
    TRADE_END_DATE,
)
check_and_make_directories([DATA_SAVE_DIR, TRAINED_MODEL_DIR, TENSORBOARD_LOG_DIR, RESULTS_DIR])


import warnings
warnings.filterwarnings('ignore')
import itertools

In [2]:
# ETFs
TICKERS = ['XLP', 'XLY', 'XLI', 'XLE', 'XLK', 'IYZ', 'XRT', 'XLV', 'XLU', 'VTI']

# Mutual Funds
# TICKERS = ['VCSAX', 'FSCPX', 'VINAX', 'FSENX', 'VITAX', 'FSDCX', 'FSRPX', 'VGHCX', 'VUIAX', 'VEXAX']

# Futures
# TICKERS = ['SPSU', 'SPSD', 'SPSI', 'SPEN', 'SPTL', 'SPTS', 'SPSD', 'SPHC', 'SPUT', 'ES']

START_DATE = '1980-01-01'
END_DATE = '2024-12-31'
TRAIN_START_DATE = START_DATE
TRAIN_END_DATE = '2020-01-01'
TRADE_START_DATE = '2020-01-01'
TRADE_END_DATE = '2022-11-20'

print('Downloading data...')
stock_data = YahooDownloader(
    ticker_list=TICKERS,
    start_date=START_DATE,
    end_date=END_DATE,
).fetch_data()

stock_data.head()

Downloading data...


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (62600, 8)


Price,date,close,high,low,open,volume,tic,day
0,1998-12-22,11.746051,11.809159,11.706608,11.769716,15200,XLE,1
1,1998-12-22,14.582212,14.582212,14.533279,14.533279,600,XLI,1
2,1998-12-22,23.943205,24.281748,23.744748,24.211705,300500,XLK,1
3,1998-12-22,14.274884,14.291718,13.938212,13.938212,150300,XLP,1
4,1998-12-22,11.913822,12.082326,11.913822,12.082326,7900,XLU,1


In [3]:
stock_data["date"] = pd.to_datetime(stock_data["date"])

In [4]:
df = stock_data.copy()
df["date"] = df["date"].dt.strftime("%Y-%m-%d")

print(f"Date column type before: {stock_data['date'].dtype}")
print(f"Date column type after: {df['date'].dtype}")

fe = FeatureEngineer(
    use_technical_indicator=True,
    tech_indicator_list=INDICATORS,
    use_vix=True,
    use_turbulence=True,
    user_defined_feature=False,
)

print("Preprocessing data with FeatureEngineer...")
processed_data = fe.preprocess_data(df)

# Convert date to datetime after processing
processed_data["date"] = pd.to_datetime(processed_data["date"])
processed_data = processed_data.dropna().reset_index(drop=True)

print(f"Processed data shape: {processed_data.shape}")
print(f"Final date column type: {processed_data['date'].dtype}")
print(f"Features: {processed_data.columns.tolist()}")

processed_data.head()

Date column type before: datetime64[ns]
Date column type after: object
Preprocessing data with FeatureEngineer...
Successfully added technical indicators


[*********************100%***********************]  1 of 1 completed


Shape of DataFrame:  (6546, 8)
Successfully added vix
Successfully added turbulence index
Processed data shape: (45822, 18)
Final date column type: datetime64[ns]
Features: ['date', 'close', 'high', 'low', 'open', 'volume', 'tic', 'day', 'macd', 'boll_ub', 'boll_lb', 'rsi_30', 'cci_30', 'dx_30', 'close_30_sma', 'close_60_sma', 'vix', 'turbulence']


Unnamed: 0,date,close,high,low,open,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence
0,1998-12-22,11.746051,11.809159,11.706608,11.769716,15200,XLE,1,0.0,12.214169,11.522481,100.0,66.666667,100.0,11.746051,11.746051,22.780001,0.0
1,1998-12-22,14.582212,14.582212,14.533279,14.533279,600,XLI,1,0.0,12.214169,11.522481,100.0,66.666667,100.0,14.582212,14.582212,22.780001,0.0
2,1998-12-22,23.943205,24.281748,23.744748,24.211705,300500,XLK,1,0.0,12.214169,11.522481,100.0,66.666667,100.0,23.943205,23.943205,22.780001,0.0
3,1998-12-22,14.274884,14.291718,13.938212,13.938212,150300,XLP,1,0.0,12.214169,11.522481,100.0,66.666667,100.0,14.274884,14.274884,22.780001,0.0
4,1998-12-22,11.913822,12.082326,11.913822,12.082326,7900,XLU,1,0.0,12.214169,11.522481,100.0,66.666667,100.0,11.913822,11.913822,22.780001,0.0


In [5]:
processed_data = processed_data.sort_values(["date", "tic"], ignore_index=True)
processed_data.index = processed_data.date.factorize()[0]

cov_list = []
return_list = []

lookback = 252
for i in range(lookback, len(processed_data.index.unique())):
    data_lookback = processed_data.iloc[i - lookback : i]
    price_lookback = data_lookback.pivot(index="date", columns="tic", values="close")
    return_lookback = price_lookback.pct_change().dropna()
    return_list.append(return_lookback)

    cov = return_lookback.cov()
    cov_list.append(cov)

df_cov = pd.DataFrame(
    {
        "date": processed_data.date.unique()[lookback:],
        "cov_list": cov_list,
        "return_list": return_list,
    }
)
processed_data = processed_data.merge(df_cov, on="date")
processed_data = processed_data.sort_values(["date", "tic"]).reset_index(drop=True)

processed_data.head()

Unnamed: 0,date,close,high,low,open,volume,tic,day,macd,boll_ub,boll_lb,rsi_30,cci_30,dx_30,close_30_sma,close_60_sma,vix,turbulence,cov_list,return_list
0,1999-12-22,13.516106,13.628339,13.500072,13.548172,605300,XLE,2,-0.103612,14.166833,13.373419,45.574828,-104.984033,22.227571,13.977273,13.829793,22.43,0.0,tic XLE XLI XLK XLP ...,tic XLE XLI XLK ...
1,1999-12-22,17.689074,17.788006,17.629714,17.73854,511700,XLI,2,0.010759,17.973617,17.176688,49.821565,11.632671,7.185298,17.667051,17.63955,22.43,0.0,tic XLE XLI XLK XLP ...,tic XLE XLI XLK ...
2,1999-12-22,39.645401,40.112367,39.120064,39.972277,307700,XLK,2,1.361656,39.79493,34.553062,69.913484,160.09659,38.563927,36.267855,33.651679,22.43,0.0,tic XLE XLI XLK XLP ...,tic XLE XLI XLK ...
3,1999-12-22,12.483128,12.57673,12.321451,12.363998,750600,XLP,2,-0.275549,14.05647,11.978246,42.296779,-110.384911,31.926271,13.197083,13.134717,22.43,0.0,tic XLE XLI XLK XLP ...,tic XLE XLI XLK ...
4,1999-12-22,11.768038,11.88982,11.742399,11.857772,84600,XLU,2,-0.033414,12.05539,11.618069,47.905721,-59.163115,6.755286,11.878775,11.891841,22.43,0.0,tic XLE XLI XLK XLP ...,tic XLE XLI XLK ...


In [6]:
train = data_split(processed_data, TRAIN_START_DATE, end=TRAIN_END_DATE)
trade = data_split(processed_data, TRADE_START_DATE, end=TRADE_END_DATE)

In [7]:
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv

In [8]:
stock_dimensions = len(train.tic.unique())
state_space = stock_dimensions
print(f'Stock dimensions: {stock_dimensions}, State Space: {state_space}')

Stock dimensions: 7, State Space: 7


In [9]:
env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "transaction_cost_pct": 0.005,
    "state_space": state_space,
    "stock_dim": stock_dimensions,
    "tech_indicator_list": INDICATORS,
    "action_space": stock_dimensions,
    "reward_scaling": 1e-4,
}

e_train_gym = StockPortfolioEnv(df=train, **env_kwargs)
e_trade_gym = StockPortfolioEnv(df=trade, **env_kwargs)

In [10]:
models_to_train = {
    "PPO": {
        "total_timesteps": 50000,
        "policy": "MlpPolicy",
        "model_kwargs": {
            "learning_rate": 0.0003,
            "n_steps": 2048,
            "batch_size": 64,
            "n_epochs": 10,
            "gamma": 0.99,
            "gae_lambda": 0.95,
            "clip_range": 0.2,
            "vf_coef": 0.5,
            "max_grad_norm": 0.5,
        },
    },
    "A2C": {
        "total_timesteps": 50000,
        "policy": "MlpPolicy",
        "model_kwargs": {
            'learning_rate': 0.0007,
            'n_steps': 5,
            'gamma': 0.99,
            'gae_lambda': 1.0,
            'ent_coef':0.01,
            'vf_coef':0.25,
            'max_grad_norm':0.5,
        },
    },
    "DDPG": {
        "total_timesteps": 50000,
        "policy": "MlpPolicy",
        "model_kwargs": {
            "learning_rate": 0.001,
            "buffer_size": 1000000,
            "learning_starts": 100,
            "batch_size": 100,
            "tau": 0.005,
            "gamma": 0.99,
        },
    },
}

In [11]:
trained_models = {}
model_results = {}

for model_name, config in models_to_train.items():
    print(f"\n{'='*50}")
    print(f"Training {model_name} model...")
    print(f"{'='*50}")

    try:
        agent = DRLAgent(env=e_train_gym)

        model = agent.get_model(
            model_name=model_name.lower(),
            policy=config["policy"],
            model_kwargs=config["model_kwargs"],
        )

        trained_model = agent.train_model(
            model=model,
            total_timesteps=config["total_timesteps"],
            tb_log_name=model_name.lower(),
        )

        model_path = f"{TRAINED_MODEL_DIR}/{model_name.lower()}_ff_model"
        trained_model.save(model_path)
        trained_models[model_name] = trained_model

        print(f"{model_name} training completed and saved!")

    except Exception as e:
        print(f"Error training {model_name}: {str(e)}")
        continue

print(f"\nSuccessfully trained {len(trained_models)} models")


Training PPO model...
{'learning_rate': 0.0003, 'n_steps': 2048, 'batch_size': 64, 'n_epochs': 10, 'gamma': 0.99, 'gae_lambda': 0.95, 'clip_range': 0.2, 'vf_coef': 0.5, 'max_grad_norm': 0.5}
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| time/              |           |
|    fps             | 430       |
|    iterations      | 1         |
|    time_elapsed    | 4         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 1324263.8 |
----------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 375          |
|    iterations           | 2            |
|    time_elapsed         | 10           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 9.837095e-09 |
|    clip_fraction        | 0            |
|    clip_r

In [12]:
def test_model(model, model_name, env):
    """ "
    Test a trained model return results
    """
    print(f"\nTesting {model_name}...")

    obs = env.reset()

    for i in range(len(env.get_attr("df")[0].index.unique()) - 1):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        if dones:
            break

    asset_memory = env.get_attr("asset_memory")[0]
    daily_returns = np.diff(asset_memory) / asset_memory[:-1]

    results = {
        "final_value": asset_memory[-1],
        "total_return": (asset_memory[-1] / env.get_attr("initial_amount")[0] - 1)
        * 100,
        "daily_returns": daily_returns,
        "asset_memory": asset_memory,
        "date_memory": env.get_attr("date_memory")[0],
        "actions_memory": env.get_attr("actions_memory")[0],
    }
    return results

In [13]:
test_results = {}

for model_name, model in trained_models.items():
    try:
        # Create a vectorized environment for testing
        test_env = DummyVecEnv([lambda: e_trade_gym])

        # Reset test environment
        obs = test_env.reset()

        # Test model
        results = test_model(model, model_name, test_env)
        test_results[model_name] = results

        print(f"{model_name} Results:")
        print(f" Final Portfolio Value: ${results['final_value']:,.2f}")
        print(f" Total Return: {results['total_return']:.2f}%")

        # Calculate additional metrics
        daily_returns = pd.Series(results["daily_returns"])
        sharpe_ratio = daily_returns.mean() / daily_returns.std() * np.sqrt(252)
        max_drawdown = daily_returns.cumsum().min()

        print(f" Sharpe Ratio: {sharpe_ratio:.4f}")
        print(f" Max Drawdown: {max_drawdown:.4f}")

    except Exception as e:
        print(f"Error testing {model_name}: {str(e)}")
        continue


Testing PPO...
PPO Results:
 Final Portfolio Value: $1,439,709.65
 Total Return: 43.97%
 Sharpe Ratio: 0.6353
 Max Drawdown: -0.4051

Testing A2C...
A2C Results:
 Final Portfolio Value: $1,484,404.91
 Total Return: 48.44%
 Sharpe Ratio: 0.6744
 Max Drawdown: -0.4227

Testing DDPG...
DDPG Results:
 Final Portfolio Value: $1,332,054.91
 Total Return: 33.21%
 Sharpe Ratio: 0.5481
 Max Drawdown: -0.3376


In [15]:
# Create comparsion dataframe
comparison_data = []
for model_name, results in test_results.items():
    daily_returns = pd.Series(results['daily_returns'])
    sharpe_ratio = daily_returns.mean() / daily_returns.std() * np.sqrt(252)
    max_drawdown = (daily_returns.cumsum() - daily_returns.cumsum().expanding().max()).min()

    comparison_data.append({
        'Model': model_name,
        'Final Value': results['final_value'],
        'Total Return (%)': results['total_return'],
        'Sharpe Ratio': sharpe_ratio,
        'Max Drawdown': max_drawdown
    })

comparison_df = pd.DataFrame(comparison_data)
comparison_df = comparison_df.sort_values('Total Return (%)', ascending=False)

print("\nModel Performance Ranking:")
print(comparison_df.to_string(index=False))

# Save results
comparison_df.to_csv(f"{RESULTS_DIR}/industry_model_comparison_{lookback}_window.csv", index=False)

# Create performance visualization
plt.figure(figsize=(15, 10))

# Plot 1: Portfolio Value Over Time
plt.subplot(2, 2, 1)
for model_name, results in test_results.items():
    dates = pd.to_datetime(results['date_memory'])
    values = results['asset_memory']
    plt.plot(dates, values, label=f"{model_name}", linewidth=2)

plt.title('Portfolio Value Over Time (Industry Model: {} Window)'.format(lookback))
plt.xlabel('Date')
plt.ylabel('Portfolio Value ($)')
plt.legend()
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)

# Plot 2: Total Returns Comparison
plt.subplot(2, 2, 2)
models = comparison_df['Model']
returns = comparison_df['Total Return (%)']
colors = plt.cm.viridis(np.linspace(0, 1, len(models)))
bars = plt.bar(models, returns, color=colors)
plt.title('Total Returns Comparison')
plt.xlabel('Model')
plt.ylabel('Total Return (%)')
plt.xticks(rotation=45)
for bar, ret in zip(bars, returns):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5, 
             f'{ret:.1f}%', ha='center', va='bottom')

# Plot 3: Sharpe Ratio Comparison
plt.subplot(2, 2, 3)
sharpe_ratios = comparison_df['Sharpe Ratio']
bars = plt.bar(models, sharpe_ratios, color=colors)
plt.title('Sharpe Ratio Comparison')
plt.xlabel('Model')
plt.ylabel('Sharpe Ratio')
plt.xticks(rotation=45)
for bar, sharpe in zip(bars, sharpe_ratios):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, 
             f'{sharpe:.3f}', ha='center', va='bottom')

# Plot 4: Max Drawdown Comparison (negative values, so we flip for visualization)
plt.subplot(2, 2, 4)
drawdowns = comparison_df['Max Drawdown'].abs()
bars = plt.bar(models, drawdowns, color=colors)
plt.title('Max Drawdown Comparison')
plt.xlabel('Model')
plt.ylabel('Max Drawdown (abs)')
plt.xticks(rotation=45)
for bar, dd in zip(bars, drawdowns):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
             f'{dd:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.savefig(f"{RESULTS_DIR}/industry_model_comparison_{lookback}_window.png", dpi=300, bbox_inches='tight')
plt.show()



Model Performance Ranking:
Model  Final Value  Total Return (%)  Sharpe Ratio  Max Drawdown
  A2C 1.484405e+06         48.440491      0.674410     -0.436207
  PPO 1.439710e+06         43.970965      0.635252     -0.433477
 DDPG 1.332055e+06         33.205491      0.548114     -0.375929
