In [18]:
import gym
from gym import spaces
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
class CardioEnv(gym.Env):
    def __init__(self, features, risks):
        super(CardioEnv, self).__init__()
        
        # Features patient → état initial
        self.features = features  # np.array (patients x features)
        self.risks = risks        # np.array (patients,) : risque simulé
        self.n_patients, self.n_features = features.shape

        # Action : dosage médocs (par ex. 0 à 3 doses)
        self.action_space = spaces.Discrete(4)

        # Observation : features patient
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(self.n_features,), dtype=np.float32
        )
        
        self.current_patient = 0

    def reset(self):
        self.current_patient = 0
        return self.features[self.current_patient]

    def step(self, action):
        # Reward : réduction du risque (ex: plus le risque simulé diminue, mieux)
        reward = -self.risks[self.current_patient] + action*0.05  # simple proxy
        self.current_patient += 1

        done = self.current_patient >= self.n_patients
        obs = self.features[self.current_patient-1] if not done else np.zeros(self.n_features)

        return obs, reward, done, {}

In [14]:
from sklearn.preprocessing import StandardScaler

# Exemple : utiliser features issues CNN+LSTM fusion ou juste signaux
features = np.random.randn(100, 10)   # 100 patients, 10 features simulées
risks = np.random.rand(100)           # risque simulé [0,1]

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

In [15]:
from stable_baselines3 import PPO

# Créer l'environnement
env = CardioEnv(features_scaled, risks)

# Définir l'agent PPO
model = PPO("MlpPolicy", env, verbose=1)

# Entraîner l'agent
model.learn(total_timesteps=10000)


Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 100      |
|    ep_rew_mean     | -42.3    |
| time/              |          |
|    fps             | 1225     |
|    iterations      | 1        |
|    time_elapsed    | 1        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 100         |
|    ep_rew_mean          | -42.3       |
| time/                   |             |
|    fps                  | 987         |
|    iterations           | 2           |
|    time_elapsed         | 4           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.008139156 |
|    clip_fraction        | 0.0369      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.38       |
|    explained_variance   | -0.122      |
|    learning_rate        | 0.

<stable_baselines3.ppo.ppo.PPO at 0x7d47dd47ec60>

In [16]:
obs = env.reset()
baseline_reward_no_action = 0
done = False

while not done:
    action = 0  # jamais de traitement
    obs, reward, done, info = env.step(action)
    baseline_reward_no_action += reward

print("Baseline reward (no treatment):", baseline_reward_no_action)


Baseline reward (no treatment): -49.90277189621755


In [17]:
obs = env.reset()
baseline_reward_full_action = 0
done = False

while not done:
    action = 1  # toujours traiter
    obs, reward, done, info = env.step(action)
    baseline_reward_full_action += reward

print("Baseline reward (always treatment):", baseline_reward_full_action)


Baseline reward (always treatment): -44.902771896217516


In [None]:
# Reward par épisode avec moyenne glissante
window = 10
rolling_mean = pd.Series(ppo_rewards).rolling(window).mean()

plt.figure(figsize=(12,6))
plt.plot(ppo_rewards, label="PPO Reward", color="blue", alpha=0.6)
plt.plot(rolling_mean, label=f"Rolling Mean ({window})", color="red")
plt.axhline(baseline_reward_no_action, color="black", linestyle="--", label="Baseline: no treatment")
plt.axhline(baseline_reward_full_action, color="green", linestyle="--", label="Baseline: always treatment")
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("Reward per Episode")
plt.legend()
plt.show()
