In [1]:
import gym
from gym import spaces
import numpy as np

In [2]:
class AdEnv(gym.Env):
    def __init__(self, n_users=1000, n_ads=10):
        super(AdEnv, self).__init__()
        self.n_ads = n_ads
        self.n_users = n_users
        self.action_space = spaces.Discrete(n_ads)
        self.observation_space = spaces.Box(low=0, high=1, shape=(10,), dtype=np.float32)

        # Simuler des utilisateurs avec CTR (Click-Through Rate) par pub
        self.user_profiles = np.random.rand(n_users, 10)
        self.ctr_matrix = np.random.rand(n_users, n_ads)
        self.current_user = None

        self.reset()

    def reset(self):
        # Tirer un utilisateur au hasard
        self.current_user = np.random.randint(self.n_users)
        obs = self.user_profiles[self.current_user]
        return obs

    def step(self, action):
        assert self.action_space.contains(action)

        ctr = self.ctr_matrix[self.current_user, action]
        reward = np.random.binomial(1, ctr)  # 1 si clic, 0 sinon

        done = True  # 1 pub = 1 interaction
        info = {"ctr": ctr}

        obs = self.reset()  
        return obs, reward, done, info
