In [None]:
!pip install stable-baselines3[extra]
import numpy as np
import pandas as pd
import gymnasium
from gymnasium import spaces
from sklearn.preprocessing import StandardScaler
from stable_baselines3 import PPO
from stable_baselines3.common.env_checker import check_env



In [None]:
# download data volatilitas sektor
!gdown 1eWKF-p-nBNiMsNAbGVeAYWAMgb7WbVEW

# data volatilitas sektor 7 harian
!gdown 1YscGkU7fVnUDF6yfHl7LwbTcx6e7Nf2S

# download data volatilitas sekor + geopolitic 1
!gdown 1kr6rRuli9JuUqRcBGnp2So_IeRVBk7Qq

# download data volatilitas sektor + geopolitic 2
!gdown 1P2LXh8gwsfggiA1TTzGU9mdYYhV3hEMB

!gdown 1uf70EYuQmeD5gkBAiPWTSEwyd3iRmvG-

Downloading...
From: https://drive.google.com/uc?id=1eWKF-p-nBNiMsNAbGVeAYWAMgb7WbVEW
To: /content/sector_volatility.csv
100% 1.65M/1.65M [00:00<00:00, 66.2MB/s]
Downloading...
From: https://drive.google.com/uc?id=1YscGkU7fVnUDF6yfHl7LwbTcx6e7Nf2S
To: /content/sector_volatility7d.csv
100% 1.66M/1.66M [00:00<00:00, 154MB/s]
Downloading...
From: https://drive.google.com/uc?id=1kr6rRuli9JuUqRcBGnp2So_IeRVBk7Qq
To: /content/sector_vol_with_geo.csv
100% 1.84M/1.84M [00:00<00:00, 195MB/s]
Downloading...
From: https://drive.google.com/uc?id=1P2LXh8gwsfggiA1TTzGU9mdYYhV3hEMB
To: /content/sector_vol_with_geo_2_7d.csv
100% 3.24M/3.24M [00:00<00:00, 192MB/s]
Downloading...
From: https://drive.google.com/uc?id=1uf70EYuQmeD5gkBAiPWTSEwyd3iRmvG-
To: /content/tft_baseline+geo2_metrics_comparison.csv
100% 6.28k/6.28k [00:00<00:00, 25.9MB/s]


In [None]:
# --- TAHAP 1: MEMUAT DAN TRANSFORMASI DATA (LOGIKA BARU SESUAI SARAN) ---
def preprocess_data_for_drl(file_path):
    df_long = pd.read_csv(file_path)
    df_long['Date'] = pd.to_datetime(df_long['Date'])

    print("Membaca data mentah (format panjang)...")

    # --- Feature Engineering: Memilih Fitur Berita Terbaik per Sektor (Asumsi) ---
    # Berdasarkan saran Anda, kita tidak akan menggunakan semua fitur berita,
    # melainkan memilih satu yang paling relevan untuk setiap sektor.
    # INI ADALAH ASUMSI YANG BISA ANDA UBAH SESUAI RISET ANDA.
    feature_mapping = {
        'Basic Materials': 'GPR_Threat_Daily',
        'Consumer Cyclicals': 'ArticlesCount_Daily',
        'Consumer Non-Cyclicals': 'ArticlesCount_Daily',
        'Energy': 'ArticlesCount_Daily',
        'Financials': 'GPR_Daily',
        'Industrials': 'ArticlesCount_Daily',
        'Infrastuctures': 'GPR_Threat_Daily',
        # 'Kesehatan': 'ArticlesCount_Daily',
        'Properties & Real Estate': 'GPR_Threat_Daily',
        'Technology': 'ArticlesCount_Daily',
        'Transportation & Logistic': 'GPR_Daily',
    }

    # Membuat kolom baru 'BestNewsFeature' berdasarkan pemetaan di atas
    df_long['BestNewsFeature'] = df_long.apply(
        lambda row: row[feature_mapping.get(row['Sector'], 'GPR_Daily')], axis=1
    )

    print("Melakukan feature engineering (memilih fitur berita terbaik)...")

     # --- PEMISAHAN DATA ---
    # 1. Pivot untuk data STATE (HANYA VOLATILITAS DAN FITUR BERITA)
    df_state_wide = df_long.pivot_table(
        index='Date',
        columns='Sector',
        values=['SectorVolatility_7d', 'BestNewsFeature']
    )
    df_state_wide.columns = [f'{col[1]}_{col[0]}' for col in df_state_wide.columns]

    # 2. Pivot untuk data RETURN (HANYA RETURN)
    df_returns_wide = df_long.pivot_table(
        index='Date',
        columns='Sector',
        values=['SectorReturn_avg']
    )
    df_returns_wide.columns = [f'{col[1]}_{col[0]}' for col in df_returns_wide.columns]

    # Menyelaraskan kedua dataframe, memastikan keduanya memiliki tanggal yang sama
    aligned_state, aligned_returns = df_state_wide.align(df_returns_wide, join='inner', axis=0)

    # Mengisi nilai NaN
    aligned_state.fillna(method='ffill', inplace=True)
    aligned_state.dropna(inplace=True)
    aligned_returns.fillna(method='ffill', inplace=True)
    aligned_returns.dropna(inplace=True)

    # Menyelaraskan kembali setelah dropna
    final_state, final_returns = aligned_state.align(aligned_returns, join='inner', axis=0)

    print("\nData berhasil ditransformasi dan dipisahkan untuk State dan Reward.")
    print(f"Bentuk data State (baris, kolom): {final_state.shape}")
    print(f"Bentuk data Return (baris, kolom): {final_returns.shape}")

    sector_names = df_long['Sector'].unique().tolist()

    return final_state, final_returns, sector_names

In [None]:
# --- TAHAP 3: MAIN SCRIPT UNTUK MELATIH DAN MENJALANKAN ---
if __name__ == '__main__':
    FILE_NAME = 'sector_vol_with_geo_2_7d.csv'
    try:
        # Menerima 3 variabel: dataframe state, dataframe return, dan nama sektor
        df_proc_state, df_proc_returns, sectors = preprocess_data_for_drl(FILE_NAME)
    except FileNotFoundError:
        print(f"ERROR: File '{FILE_NAME}' tidak ditemukan. Pastikan file berada di folder yang sama.")
        exit()

df_proc_state.info()

Membaca data mentah (format panjang)...
Melakukan feature engineering (memilih fitur berita terbaik)...

Data berhasil ditransformasi dan dipisahkan untuk State dan Reward.
Bentuk data State (baris, kolom): (2575, 22)
Bentuk data Return (baris, kolom): (2575, 11)
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2575 entries, 2015-01-13 to 2025-06-25
Data columns (total 22 columns):
 #   Column                                         Non-Null Count  Dtype  
---  ------                                         --------------  -----  
 0   Basic Materials_BestNewsFeature                2575 non-null   float64
 1   Consumer Cyclicals_BestNewsFeature             2575 non-null   float64
 2   Consumer Non-Cyclicals_BestNewsFeature         2575 non-null   float64
 3   Energy_BestNewsFeature                         2575 non-null   float64
 4   Financials_BestNewsFeature                     2575 non-null   float64
 5   Industrials_BestNewsFeature                    2575 non-null   float64
 6 

  aligned_state.fillna(method='ffill', inplace=True)
  aligned_returns.fillna(method='ffill', inplace=True)


In [None]:
# --- TAHAP 2: CUSTOM ENVIRONMENT DRL ---
class PortfolioEnv(gymnasium.Env):
    """
    Environment DRL yang dirancang untuk menggunakan data state dan return yang terpisah.
    """
    def __init__(self, df_state, df_returns, sector_names, episode_length=7):
        super().__init__()

        self.df_returns = df_returns
        self.sector_names = sector_names
        self.num_sectors = len(sector_names)
        self.episode_length = episode_length
        self.initial_capital = 1_000_000

        # --- PERBAIKAN 1: Normalisasi State (Observasi) ---
        # Ini sangat penting untuk stabilitas training dan membantu explained_variance
        self.scaler = StandardScaler()
        self.df_state = pd.DataFrame(self.scaler.fit_transform(df_state), columns=df_state.columns)

        # Action space dan Observation space
        self.action_space = spaces.Box(low=0, high=1, shape=(self.num_sectors,), dtype=np.float32)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(len(self.df_state.columns),), dtype=np.float32)

        self.current_step = 0
        self.max_sharpe_reward = 10.0 # Batas atas dan bawah untuk reward

    def _get_state(self):
        # State sudah dinormalisasi saat inisialisasi
        return self.df_state.iloc[self.current_step].values.astype(np.float32)

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        # --- PERBAIKAN 2: Proteksi untuk data yang terlalu pendek ---
        if len(self.df_state) <= self.episode_length + 1:
             raise ValueError("Dataframe tidak cukup panjang untuk satu episode.")

        self.current_step = np.random.randint(0, len(self.df_state) - self.episode_length - 1)
        return self._get_state(), {}

    def step(self, action):
        # Gunakan softmax untuk memastikan bobot berjumlah 1
        weights = np.exp(action) / np.sum(np.exp(action))

        start_sim_step = self.current_step + 1
        end_sim_step = start_sim_step + self.episode_length
        simulation_period_returns = self.df_returns.iloc[start_sim_step:end_sim_step]

        # Lacak nilai portofolio harian selama periode simulasi
        portfolio_values = [self.initial_capital]
        current_capital_per_sector = self.initial_capital * weights

        for _, daily_returns_row in simulation_period_returns.iterrows():
            # Update nilai modal di setiap sektor berdasarkan return hariannya
            daily_returns_per_sector = daily_returns_row[
                [f'{sector}_SectorReturn_avg' for sector in self.sector_names]
            ].values
            current_capital_per_sector *= (1 + daily_returns_per_sector)
            portfolio_values.append(np.sum(current_capital_per_sector))

        # Hitung return harian dari nilai portofolio
        portfolio_daily_returns = pd.Series(portfolio_values).pct_change().dropna()

        # Hitung Sharpe Ratio sebagai reward
        if portfolio_daily_returns.std() > 1e-6: # Gunakan toleransi kecil untuk menghindari pembagian dengan nol
            # Asumsi risk-free rate = 0
            sharpe_ratio = portfolio_daily_returns.mean() / portfolio_daily_returns.std()
        else:
            sharpe_ratio = 0.0 # Jika tidak ada risiko, tidak ada reward berbasis risiko

        # --- PERBAIKAN 3: Logika Reward yang Lebih Baik ---
        # Reward adalah Sharpe Ratio yang dihitung selama periode episode. Tidak perlu di-annualize.
        reward = sharpe_ratio

        # --- PERBAIKAN 4: Reward Clipping ---
        # Mencegah gradien yang meledak akibat nilai reward yang ekstrim
        reward = np.clip(reward, -self.max_sharpe_reward, self.max_sharpe_reward)

        self.current_step += 1
        terminated = self.current_step >= len(self.df_state) - self.episode_length - 2
        truncated = False

        # --- PERBAIKAN 5: Info yang Lebih Kaya ---
        # Memberikan lebih banyak data untuk analisis dan debugging
        info = {
            'portfolio_pnl': portfolio_values[-1] - portfolio_values[0],
            'final_portfolio_value': portfolio_values[-1],
            'sharpe_ratio': sharpe_ratio,
            'weights': weights
        }

        return self._get_state(), reward, terminated, truncated, info

In [None]:
# --- TAHAP 3: MAIN SCRIPT UNTUK MELATIH DAN MENJALANKAN ---
if __name__ == '__main__':
    FILE_NAME = 'sector_vol_with_geo_2_7d.csv'
    try:
        # Menerima 3 variabel: dataframe state, dataframe return, dan nama sektor
        df_proc_state, df_proc_returns, sectors = preprocess_data_for_drl(FILE_NAME)
    except FileNotFoundError:
        print(f"ERROR: File '{FILE_NAME}' tidak ditemukan. Pastikan file berada di folder yang sama.")
        exit()

    df_proc_state = df_proc_state.drop(columns=['Kesehatan_BestNewsFeature'])

    # Mengirimkan kedua dataframe ke environment
    env = PortfolioEnv(df_proc_state, df_proc_returns, sectors)
    check_env(env)

    model = PPO("MlpPolicy", env, verbose=1, tensorboard_log="./ppo_portfolio_tensorboard/")

    print("\n--- MEMULAI PELATIHAN AGEN DRL ---")
    model.learn(total_timesteps=20000)
    print("--- PELATIHAN SELESAI ---")
    model.save("drl_portfolio_final_model")

    print("\n--- MEMBUAT REKOMENDASI ALOKASI PORTFOLIO ---")

    obs, info = env.reset()
    action, _states = model.predict(obs, deterministic=True)

    predicted_weights = np.exp(action) / np.sum(np.exp(action))

    uang_pengguna = 5_000_000
    print(f"\nREKOMENDASI ALOKASI DANA (Total: Rp {uang_pengguna:,.2f}) UNTUK 7 HARI KE DEPAN:")
    print("-" * 70)
    for i, sector in enumerate(sectors):
        persentase = predicted_weights[i] * 100
        alokasi_dana = predicted_weights[i] * uang_pengguna
        print(f"{sector:<25}: {persentase:>6.2f}%  =>  Rp {alokasi_dana:,.2f}")
    print("-" * 70)
    model.save("drl_portfolio_final_model")


Membaca data mentah (format panjang)...
Melakukan feature engineering (memilih fitur berita terbaik)...

Data berhasil ditransformasi dan dipisahkan untuk State dan Reward.
Bentuk data State (baris, kolom): (2575, 22)
Bentuk data Return (baris, kolom): (2575, 11)
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.

--- MEMULAI PELATIHAN AGEN DRL ---
Logging to ./ppo_portfolio_tensorboard/PPO_2
-----------------------------
| time/              |      |
|    fps             | 219  |
|    iterations      | 1    |
|    time_elapsed    | 9    |
|    total_timesteps | 2048 |
-----------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.21e+03     |
|    ep_rew_mean          | -242         |
| time/                   |              |
|    fps                  | 202          |
|    iterations           | 2            |
|    time_elapsed         | 20           |
|    t