### Main Module:
This notebook contains the training and evaluation of our model

### Dependencies & Constants

In [1]:
# Import Dependencies

%matplotlib inline
import matplotlib.pyplot as plt
import os
import pandas as pd
from stable_baselines3.common.vec_env import DummyVecEnv 
from utils.data import download_data, load_viz_data
from utils.data import train_test_split, stock_env_technical_indicators
from utils.eval import analyze_performance #, compute_cum_returns



In [2]:
# Define any Constants or parameters

START_DATE = '2009-01-01'
END_DATE = '2022-01-01'
TICKER = '^DJI'
TICKERS = ['AMZN', 'GOOGL', 'AAPL', 'MSFT', '^DJI', 'SPY']
INDICATORS = ["macd", "rsi_30", "cci_30", "adx_30"] # ['macd', 'rsi', 'cci', 'adx']

#model_class = 'A2C'
save_dir = "trained_models/"
os.makedirs(save_dir, exist_ok=True)

### Data Downloading and Preprocessing

In [3]:
# Download & Save data if Needed
    # NOTE: change DOWNLOAD_DATA to True if you want to download data - csv are available in data/ directory
DOWNLOAD_DATA = False

if DOWNLOAD_DATA:
    for ticker in TICKERS:
        download_data(ticker, START_DATE, END_DATE, save=True)


In [4]:
# Download & Preprocess Data (For Each Dataset)

# dp = DataProcessor(data_source='yahoofinance', start_date=START_DATE, end_date=END_DATE, time_interval='1D')

data_pdframes = []
for ticker in TICKERS:
    data = load_viz_data(ticker, START_DATE, END_DATE)
    data_env = stock_env_technical_indicators(data, ticker = ticker, tech_ind=INDICATORS)
    # df = dp.add_technical_indicator(data, tech_indicator_list=INDICATORS)
    data_pdframes.append(data_env)

data_pdframes[0].head()

Unnamed: 0,date,close,high,low,open,volume,tic,timestamp,turbulence,rsi_30,cci_30,adx_30,macd
0,2009-03-30,3.572,3.5835,3.4875,3.52,167462000.0,AMZN,2009-03-30,0,57.946668,75.408823,26.251704,0.142687
1,2009-03-31,3.672,3.725,3.606,3.6305,178364000.0,AMZN,2009-03-31,0,59.595429,109.107678,26.295079,0.142672
2,2009-04-01,3.675,3.7545,3.5855,3.651,140828000.0,AMZN,2009-04-01,0,59.644532,103.430223,26.39561,0.141274
3,2009-04-02,3.817,3.862,3.672,3.6815,221338000.0,AMZN,2009-04-02,0,61.911071,131.829223,26.695876,0.149896
4,2009-04-03,3.9085,3.916,3.775,3.821,116198000.0,AMZN,2009-04-03,0,63.285599,147.276155,27.073228,0.162243


### Model Training

#### LSTM DRL

In [5]:
# LSTM model intialization

#### Ensemble Strategy

In [6]:
from ensemble_strategies import run_ensemble_strategy, run_ensemble_trading, make_env

In [7]:

# Training & Testing PPO model for each Individual Stock
trained_models = {}
portfolio_values = {}
count = 0
for data in data_pdframes:
    stock = TICKERS[count]


    # Train-test split
    train_data, test_data = train_test_split(data, train_size=0.8)

    # Train 
    trained = run_ensemble_strategy(train_data, models_list=["PPO"], stock = stock, total_timesteps=50000)


    trained_models[stock] = trained

    # Testing the Model

    # stock_dimension = len(test_data.tic.unique())
    # num_stock_shares = [0] * len(test_data['tic'].unique()) # [0] * num_stock_shares = 
    test_data = test_data.reset_index(drop=True)
    stock_dimension = test_data['tic'].nunique()
    num_stock_shares = [0] * test_data['tic'].nunique()

    test_env_kwargs = {
        "df": test_data,
        "stock_dim": stock_dimension,
        "hmax": 100,
        "initial_amount": 1e6,
        "num_stock_shares": num_stock_shares,
        'buy_cost_pct': [0.001] * test_data['tic'].nunique(),
        'sell_cost_pct': [0.001] * test_data['tic'].nunique(),
        "reward_scaling": 1e-4,
        "state_space": 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension,
        "action_space": stock_dimension,
        "tech_indicator_list": INDICATORS,
        "turbulence_threshold": 250,
    }


    test_env = DummyVecEnv([make_env(test_env_kwargs)])
    portfolio = run_ensemble_trading(trained, test_env, n_steps=len(test_data))

    df_plot = analyze_performance(portfolio, stock = stock, start_date=str(test_data['date'].iloc[0])) 
    portfolio_values[stock] = df_plot
    # plt.plot(df_plot.index, df_plot, label=stock)
    count += 1



# djia = compute_cum_returns(data_pdframes[4], 'DOW', test_data)
# portfolio_values['DJIA'] = djia


for stock, returns in portfolio_values.items():
    plt.plot(returns.index, returns, label=stock)

# Vizualize
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.title('Cumulative Returns For Single Stock Over Time for PPO Model')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f"utils/results/all_PPO_cumulative_returns.png")
plt.show()

#djia.head()



Training AMZN PPO model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 2942181.68
total_reward: 1942181.68
total_cost: 4499.91
total_trades: 2547
Sharpe: 0.838
AMZN PPO training completed and saved.

Ensemble strategy total reward: [32.942696]

Training GOOGL PPO model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 996759.16
total_reward: -3240.84
total_cost: 3867.01
total_trades: 2153
Sharpe: -0.149
GOOGL PPO training completed and saved.

Ensemble strategy total reward: [42.879]

Training AAPL PPO model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 1267712.59
total_reward: 267712.59
total_cost: 3639.10
total_trades: 2544
Sharpe: 0.434
AAPL PPO training completed and saved.

Ensemble strategy total reward: [26.205881]

Training MSFT PPO model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 3551202.12
total_reward: 2551202.12
total_cost: 6949.71
total_trades: 2544
Sharpe: 0.967
MSFT PPO tra

  plt.show()


In [8]:
# Multiple Agent Training and Testing for Portfolio/Multistock Trading using DOW Jones 30

data = load_viz_data(TICKER, START_DATE, END_DATE)
data_dow = stock_env_technical_indicators(data, ticker = TICKER, tech_ind=INDICATORS)

trained_models = {}
portfolio_values = {}
ensembles = {'PPO': ['PPO'], 'A2C': ['A2C'], 'DDPG': ['DDPG'], 'Ensemble': ['PPO', 'A2C', 'DDPG']}
stock = TICKER

# df = dp.add_technical_indicator(data, tech_indicator_list=INDICATORS)

for model_name, model_list in ensembles.items():


    # Train-test split
    train_data, test_data = train_test_split(data_dow, train_size=0.8)

    # Train 
    trained = run_ensemble_strategy(train_data, models_list=model_list, stock = stock, total_timesteps=50000)


    trained_models[stock] = trained

    # Testing the Model

    # stock_dimension = len(test_data.tic.unique())
    # num_stock_shares = [0] * len(test_data['tic'].unique()) # [0] * num_stock_shares = 
    test_data = test_data.reset_index(drop=True)
    stock_dimension = test_data['tic'].nunique()
    num_stock_shares = [0] * test_data['tic'].nunique()
    # df = dp.add_technical_indicator(test_data, tech_indicator_list=INDICATORS)

    test_env_kwargs = {
        "df": test_data,
        "stock_dim": stock_dimension,
        "hmax": 100,
        "initial_amount": 1e6,
        "num_stock_shares": num_stock_shares,
        'buy_cost_pct': [0.001] * test_data['tic'].nunique(),
        'sell_cost_pct': [0.001] * test_data['tic'].nunique(),
        "reward_scaling": 1e-4,
        "state_space": 1 + 2 * stock_dimension + len(INDICATORS) * stock_dimension,
        "action_space": stock_dimension,
        "tech_indicator_list": INDICATORS,
        "turbulence_threshold": 250,
    }


    test_env = DummyVecEnv([make_env(test_env_kwargs)])
    portfolio = run_ensemble_trading(trained, test_env, n_steps=len(test_data))

    # stock_tickers = [TICKER]
    #portfolio_value = get_portfolio_value(env)
    df_plot = analyze_performance(portfolio, stock = stock, start_date=str(test_data['date'][0]), plotting=False) 
    portfolio_values[model_name] = df_plot
    # plt.plot(df_plot.index, df_plot, label=stock)

# Baseline Data - for debugging

# djia = compute_cum_returns(data_pdframes[4], 'DOW', test_data)
# portfolio_values['DJIA'] = djia

# Plotting Results
plt.figure(figsize=(14, 7))

for stock, returns in portfolio_values.items():
    plt.plot(returns.index, returns, label=stock)

# Vizualize
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.title('Cumulative Returns For a Portfolio for Different Models')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig(f"utils/results/ensembles_DOW_portfolio_cumulative_returns.png")
plt.show()


Training ^DJI PPO model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 1832440.19
total_reward: 832440.19
total_cost: 609036.34
total_trades: 2504
Sharpe: 0.519
^DJI PPO training completed and saved.

Ensemble strategy total reward: [21.06396]

Training ^DJI A2C model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 3304816.80
total_reward: 2304816.80
total_cost: 50754.74
total_trades: 2568
Sharpe: 0.886
^DJI A2C training completed and saved.

Ensemble strategy total reward: [38.531567]

Training ^DJI DDPG model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 3447145.31
total_reward: 2447145.31
total_cost: 995.69
total_trades: 2570
Sharpe: 0.911
^DJI DDPG training completed and saved.

Ensemble strategy total reward: [38.531567]

Training ^DJI PPO model

day: 2570, episode: 10
begin_total_asset: 1000000.00
end_total_asset: 1108198.44
total_reward: 108198.44
total_cost: 640659.64
total_trades: 2305
Sharpe: 0.144
^DJI 

  plt.show()


#### LLM based Sentiment Infusion

### Model Evaluation & Results