## Portfolio construction & Optimization 

In [1]:
#Importing necessary librarie
import os
import sys
import numpy as np
import pandas as pd
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO

In [4]:
#Data Configuration
DATA_DIR = "SLTOP100/"
EQUITY_SCORES_FILE = 'SLTOP100 company_scores.csv'
TBILL_FILE = 'tbill.csv'
TBOND_FILE = 'tbonds.csv'
RISK_AVERSION = 1.0        # Risk penalty weight
TRAIN_STEPS = 100_000      # RL training timesteps

#Clean numeric data
def clean_numeric(series: pd.Series) -> pd.Series:
    num = pd.to_numeric(series.astype(str).str.replace(',', ''), errors='coerce')
    return num.ffill().bfill()

def find_date_column(columns) -> str:
    for col in columns:
        if any(keyword in col.lower() for keyword in ['date', 'week', 'time']):
            return col
    return list(columns)[0]

In [5]:
#Data Loading

#Equity
def load_equity_returns(data_dir: str, scores_file: str, top_n: int = 20) -> pd.DataFrame:
    scores = pd.read_csv(scores_file, index_col=0)
    tickers = scores['score'].sort_values(ascending=False).head(top_n).index.tolist()
    
    price_dfs = {}
    for ticker in tickers:
        path = os.path.join(data_dir, f"{ticker}.csv")
        df = pd.read_csv(path, dtype=str)
        df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
        df = df.set_index('Trade Date').sort_index()
        df = df[~df.index.duplicated(keep='last')]
        df['Close (Rs.)'] = clean_numeric(df['Close (Rs.)'])
        price_dfs[ticker] = df['Close (Rs.)']
    
    prices = pd.DataFrame(price_dfs)
    daily_returns = prices.pct_change(fill_method=None).dropna()
    weekly_returns = (1 + daily_returns).resample('W').apply(lambda x: (1 + x).prod() - 1).dropna()
    return weekly_returns

#Gov securities
def load_historical_bond_returns(tbill_file: str, tbond_file: str) -> pd.DataFrame:
    def process_file(file_path: str) -> pd.DataFrame:
        df = pd.read_csv(file_path)
        df.columns = df.columns.str.strip()
        date_col = find_date_column(df.columns)
        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
        df = df.set_index(date_col).sort_index()
        df = df[~df.index.duplicated(keep='last')]
        for col in df.columns:
            df[col] = clean_numeric(df[col]) / 100.0  # convert yields to decimal
        return df.resample('W').ffill()
    
    tbill = process_file(tbill_file)
    tbond = process_file(tbond_file)
    rates = pd.concat([tbill, tbond], axis=1).dropna()
    bond_returns = rates.diff().dropna()
    return bond_returns

In [6]:
#Loading Data
eq_returns = load_equity_returns(DATA_DIR, EQUITY_SCORES_FILE)
bond_returns = load_historical_bond_returns(TBILL_FILE, TBOND_FILE)

#Cleaning and aligning
for df in [eq_returns, bond_returns]:
    df.dropna(how='all', inplace=True)

returns = eq_returns.join(bond_returns, how='inner').dropna(how='all')
if returns.empty:
    raise ValueError(
        f"No overlap found: equities={eq_returns.shape[0]} rows, bonds={bond_returns.shape[0]} rows."
    )

print(f"Using {returns.shape[0]} weeks and {returns.shape[1]} assets.")

  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], dayfirst=True, errors='coerce')
  df['Trade Date'] = pd.to_datetime(df['Trade Date'], 

Using 156 weeks and 36 assets.


In [7]:
#Portffolio Environment

class PortfolioEnv(gym.Env):
    metadata = {'render_modes': ['human']}

    def __init__(self, returns: pd.DataFrame, risk_aversion: float = 1.0):
        super().__init__()
        self.returns = returns.values
        self.n_assets = self.returns.shape[1]
        self.risk_aversion = risk_aversion
        self.max_steps = len(self.returns) - 1
        
        self.action_space = spaces.Box(low=0, high=1, shape=(self.n_assets,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.n_assets,), dtype=np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = 0
        return self.returns[self.current_step], {}

    def step(self, action):
        w = np.clip(action, 0, None)
        w /= (w.sum() + 1e-8)  # normalize weights
        r = float(self.returns[self.current_step + 1] @ w)

        start_idx = max(0, self.current_step - 20)
        window = self.returns[start_idx:self.current_step + 1]
        risk = 0.0 if window.shape[0] < 2 else float(w @ np.cov(window.T) @ w)

        reward = r - self.risk_aversion * risk

        self.current_step += 1
        done = self.current_step >= self.max_steps
        obs = self.returns[self.current_step] if not done else np.zeros(self.n_assets)
        
        return obs, reward, done, False, {'ret': r, 'risk': risk}

    def render(self): pass

In [8]:
#Training PPO Agent
env = PortfolioEnv(returns, RISK_AVERSION)
model = PPO('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=TRAIN_STEPS)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 155       |
|    ep_rew_mean     | -6.42e+04 |
| time/              |           |
|    fps             | 1791      |
|    iterations      | 1         |
|    time_elapsed    | 1         |
|    total_timesteps | 2048      |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 155          |
|    ep_rew_mean          | -6.57e+04    |
| time/                   |              |
|    fps                  | 1063         |
|    iterations           | 2            |
|    time_elapsed         | 3            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0060878387 |
|    clip_fraction        | 0.0538       |
|    clip_range           | 0.2         

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 155         |
|    ep_rew_mean          | -5.97e+04   |
| time/                   |             |
|    fps                  | 907         |
|    iterations           | 11          |
|    time_elapsed         | 24          |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.005421199 |
|    clip_fraction        | 0.0366      |
|    clip_range           | 0.2         |
|    entropy_loss         | -51.1       |
|    explained_variance   | 1.91e-06    |
|    learning_rate        | 0.0003      |
|    loss                 | 7.93e+07    |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.0129     |
|    std                  | 1           |
|    value_loss           | 1.74e+08    |
-----------------------------------------
------------------------------------------
| rollout/                |      

------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 155          |
|    ep_rew_mean          | -5.53e+04    |
| time/                   |              |
|    fps                  | 928          |
|    iterations           | 20           |
|    time_elapsed         | 44           |
|    total_timesteps      | 40960        |
| train/                  |              |
|    approx_kl            | 0.0054691676 |
|    clip_fraction        | 0.036        |
|    clip_range           | 0.2          |
|    entropy_loss         | -51.2        |
|    explained_variance   | 2.38e-07     |
|    learning_rate        | 0.0003       |
|    loss                 | 8.42e+07     |
|    n_updates            | 190          |
|    policy_gradient_loss | -0.014       |
|    std                  | 1            |
|    value_loss           | 1.25e+08     |
------------------------------------------
-----------------------------------------
| rollout/  

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 155       |
|    ep_rew_mean          | -5.21e+04 |
| time/                   |           |
|    fps                  | 799       |
|    iterations           | 29        |
|    time_elapsed         | 74        |
|    total_timesteps      | 59392     |
| train/                  |           |
|    approx_kl            | 0.0052784 |
|    clip_fraction        | 0.0337    |
|    clip_range           | 0.2       |
|    entropy_loss         | -51.2     |
|    explained_variance   | 0         |
|    learning_rate        | 0.0003    |
|    loss                 | 1.1e+07   |
|    n_updates            | 280       |
|    policy_gradient_loss | -0.0119   |
|    std                  | 1         |
|    value_loss           | 9.21e+07  |
---------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 155   

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 155         |
|    ep_rew_mean          | -4.93e+04   |
| time/                   |             |
|    fps                  | 785         |
|    iterations           | 38          |
|    time_elapsed         | 99          |
|    total_timesteps      | 77824       |
| train/                  |             |
|    approx_kl            | 0.006077987 |
|    clip_fraction        | 0.0454      |
|    clip_range           | 0.2         |
|    entropy_loss         | -51.2       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 2.51e+07    |
|    n_updates            | 370         |
|    policy_gradient_loss | -0.0138     |
|    std                  | 1           |
|    value_loss           | 7.53e+07    |
-----------------------------------------
------------------------------------------
| rollout/                |      

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 155         |
|    ep_rew_mean          | -4.65e+04   |
| time/                   |             |
|    fps                  | 807         |
|    iterations           | 47          |
|    time_elapsed         | 119         |
|    total_timesteps      | 96256       |
| train/                  |             |
|    approx_kl            | 0.006475144 |
|    clip_fraction        | 0.0515      |
|    clip_range           | 0.2         |
|    entropy_loss         | -51.2       |
|    explained_variance   | 0           |
|    learning_rate        | 0.0003      |
|    loss                 | 7.27e+07    |
|    n_updates            | 460         |
|    policy_gradient_loss | -0.0171     |
|    std                  | 1           |
|    value_loss           | 9.19e+07    |
-----------------------------------------
------------------------------------------
| rollout/                |      

<stable_baselines3.ppo.ppo.PPO at 0x2775824d060>

In [9]:
#Portfolio Evaluation
obs, _ = env.reset()
weights_hist, ret_hist = [], []

for _ in range(env.max_steps):
    action, _ = model.predict(obs, deterministic=True)
    obs, _, done, _, info = env.step(action)
    
    w = np.clip(action, 0, None)
    w /= (w.sum() + 1e-8)
    
    weights_hist.append(w)
    ret_hist.append(info['ret'])
    
    if done:
        break

In [10]:
#Save results
weights_df = pd.DataFrame(weights_hist, index=returns.index[1:], columns=returns.columns)
perf_df = pd.DataFrame({'portfolio_return': ret_hist}, index=returns.index[1:])

weights_df.to_csv('rl_portfolio_weights.csv')
perf_df.to_csv('rl_portfolio_performance.csv')

print("Saved portfolio weights & performance.")


Saved portfolio weights & performance.
