In [None]:
from stable_baselines3 import SAC
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv
import pandas as pd


from ThesisEnvironment  import PortfolioEnvironment as PorEnv
from sklearn.model_selection import train_test_split


In [68]:
df = pd.read_csv("../../Data/StockPrices.csv")

stock_data_train, stock_data_test = train_test_split(
    df, test_size=0.2, shuffle=False
)

esg_scores = [36.6, 35.3, 17.9, 18, 
              18, 21.2, 18.7, 20,
              19.8, 13.8, 18.1, 19, 
              17.2, 14, 17.2, 19.5, 
              19.7, 21.2, 26.8, 19.3]


In [86]:
train_env = PorEnv(stock_data_train, esg_scores, max_steps=100, window_size=10, esg_threshold=27)
train_env = DummyVecEnv([lambda: train_env])


In [89]:
# Initialize the SAC model
model = SAC(
    policy="MlpPolicy",     # Policy type
    policy_kwargs=dict(net_arch=[64, 64]),  # Smaller network
    env=train_env,                # Environment
    verbose=1,              # Printing
    learning_rate=3e-4,     # Learning rate
    buffer_size=1000000,    # Memory usage
    batch_size=64,         # Batch size for training  (higher= stable updates and exploitation, and vice versa)
    ent_coef='auto',        # Entropy coefficient (higher=more exploration, and vice versa)
    gamma=0.99,             # Discount factor (time value of older rewards/observations)
    tau=0.005,              # Target network update rate
    train_freq=1,           # Train every step (higher=policy update frequency and exploitation, and vice versa)
    gradient_steps=1,  # Gradient steps per update
    seed=42  # Random seed for reproducibility
)

Using cpu device




In [90]:
# Train the model
model.learn(total_timesteps=5000)

# Save the model
model.save("sac_portfolio_management")

---------------------------------
| time/              |          |
|    episodes        | 4        |
|    fps             | 51       |
|    time_elapsed    | 7        |
|    total_timesteps | 400      |
| train/             |          |
|    actor_loss      | -25.8    |
|    critic_loss     | 0.45     |
|    ent_coef        | 0.925    |
|    ent_coef_loss   | -2.6     |
|    learning_rate   | 0.0003   |
|    n_updates       | 299      |
---------------------------------
---------------------------------
| time/              |          |
|    episodes        | 8        |
|    fps             | 45       |
|    time_elapsed    | 17       |
|    total_timesteps | 800      |
| train/             |          |
|    actor_loss      | -43.3    |
|    critic_loss     | 0.267    |
|    ent_coef        | 0.82     |
|    ent_coef_loss   | -6.65    |
|    learning_rate   | 0.0003   |
|    n_updates       | 699      |
---------------------------------
---------------------------------
| time/       

In [91]:
test_env = PorEnv(stock_data_test, esg_scores, max_steps=100, window_size=10, esg_threshold=27)
test_env = DummyVecEnv([lambda: test_env])


In [1]:
# # Initialize the testing environment
# obs = test_env.reset()

# # Create a list to store the weights and portfolio values
# weights_history = []
# portfolio_values = []

# # Run the testing loop
# for _ in range(len(stock_data_test) - 1):  # Adjust for test data length
#     # Predict the action (portfolio weights) using the trained model
#     action, _states = model.predict(obs, deterministic=True)
    
#     # Normalize the action to ensure weights sum to 1
#     normalized_action = np.clip(action, 0, 1).astype("float64")  # Clip to [0, 1]
#     normalized_action /= np.sum(normalized_action)  # Normalize to sum to 1
    
#     # Execute the action in the testing environment
#     obs, rewards, dones, info = test_env.step(normalized_action)
    
#     # Store the normalized weights and portfolio value
#     weights_history.append(np.squeeze(normalized_action))  # Remove the extra dimension
#     portfolio_values.append(test_env.envs[0].cash)  # Access the cash value from the environment
    
#     # Render the environment (optional)
#     test_env.render()
    
#     # Reset the environment if the episode is done
#     if dones:
#         obs = test_env.reset()

# # Convert the weights history to a DataFrame
# weights_df = pd.DataFrame(weights_history, columns=[f"Stock_{i+1}" for i in range(test_env.envs[0].num_stocks)])

# # Add the portfolio values to the DataFrame
# # weights_df["Portfolio_Value"] = portfolio_values

# # Verify that the weights sum to 1
# # weights_df["Sum_of_Weights"] = weights_df[[f"Stock_{i+1}" for i in range(test_env.envs[0].num_stocks)]].sum(axis=1)
# # print(weights_df.head())

In [2]:
# weights_df.to_csv("../../Data/RL_weights.csv", index=False)
