In [134]:
from easypip import easyimport, easyinstall, is_notebook
easyinstall("bbrl>=0.2.2")
easyinstall("swig")
easyinstall("bbrl_gymnasium>=0.2.0")
easyinstall("bbrl_gymnasium[box2d]")
easyinstall("bbrl_gymnasium[classic_control]")
easyinstall("tensorboard")
easyinstall("moviepy")
easyinstall("box2d-kengz")

[easypip] Installing bbrl_gymnasium>=0.2.0
[easypip] Installing bbrl_gymnasium[box2d]
[easypip] Installing bbrl_gymnasium[classic_control]


In [135]:
import os
import sys
from pathlib import Path
import math
import numpy as np

from moviepy.editor import ipython_display as video_display
import time
from tqdm.auto import tqdm
from typing import Tuple, Optional
from functools import partial

from omegaconf import OmegaConf
import torch
import bbrl_gymnasium

import copy
from abc import abstractmethod, ABC
import torch.nn as nn
import torch.nn.functional as F
from time import strftime

from gymnasium import Env, Space, Wrapper, make

# Imports all the necessary classes and functions from BBRL
from bbrl.agents.agent import Agent
from bbrl import get_arguments, get_class, instantiate_class
# The workspace is the main class in BBRL, this is where all data is collected and stored
from bbrl.workspace import Workspace

# Agents(agent1,agent2,agent3,...) executes the different agents the one after the other
# TemporalAgent(agent) executes an agent over multiple timesteps in the workspace, 
# or until a given condition is reached
from bbrl.agents import Agents, TemporalAgent

# ParallelGymAgent is an agent able to execute a batch of gymnasium environments
# with auto-resetting. These agents produce multiple variables in the workspace:
# ’env/env_obs’, ’env/reward’, ’env/timestep’, ’env/terminated’,
# 'env/truncated', 'env/done', ’env/cumulated_reward’, ... 
# 
# When called at timestep t=0, the environments are automatically reset. At
# timestep t>0, these agents will read the ’action’ variable in the workspace at
# time t − 1
from bbrl.agents.gymnasium import GymAgent, ParallelGymAgent, make_env, record_video

# Replay buffers are useful to store past transitions when training
from bbrl.utils.replay_buffer import ReplayBuffer

import torch.nn as nn

from typing import Tuple
from bbrl.agents.gymnasium import make_env, GymAgent, ParallelGymAgent
from functools import partial

from bbrl import instantiate_class

import matplotlib.pyplot as plt

from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

In [136]:
# Utility function for launching tensorboard
# For Colab - otherwise, it is easier and better to launch tensorboard from
# the terminal
def setup_tensorboard(path):
    path = Path(path)
    answer = ""
    if is_notebook():
        if get_ipython().__class__.__module__ == "google.colab._shell":
            answer = "y"
        while answer not in ["y", "n"]:
                answer = input(f"Do you want to launch tensorboard in this notebook [y/n] ").lower()

    if answer == "y":
        get_ipython().run_line_magic("load_ext", "tensorboard")
        get_ipython().run_line_magic("tensorboard", f"--logdir {path.absolute()}")
    else:
        import sys
        import os
        import os.path as osp
        print(f"Launch tensorboard from the shell:\n{osp.dirname(sys.executable)}/tensorboard --logdir={path.absolute()}")

In [137]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.optimizers import Adam

from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten
import numpy as np
import random as python_random
import tensorflow as tf

SEED = 42

def set_seeds(seed):
    np.random.seed(seed)
    python_random.seed(seed)
    tf.random.set_seed(seed)

class cnnAgent:
    def __init__(self):
        set_seeds(SEED) 

    def build_feature_extractor_model(self, input_shape):
        model = Sequential()
        model.add(Conv2D(32, (7, 7), strides=(2, 2), activation='relu', padding="same", input_shape=(151, 562, 1)))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        model.add(Conv2D(64, (7, 7), strides=(2, 2), padding="same", activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        model.add(Conv2D(64, (7, 7), strides=(2, 2), padding="same", activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        model.add(Flatten())
        model.add(Dense(100, activation='relu')) #a changer la valeur ici pour la taille du ouput
        return model

    def extract_features(self, image):
        image_resized = np.expand_dims(np.expand_dims(image, axis=-1), axis=0)  
        if not hasattr(self, 'model'):
            self.model = self.build_feature_extractor_model(input_shape=image_resized.shape[1:])
        features = self.model.predict(image_resized)
        return features


In [138]:
params={
  "save_best": False,
  "logger":{
    "classname": "bbrl.utils.logger.TFLogger",
    "log_dir": "./tblogs/dqn-buffer-" + str(time.time()),
    "cache_size": 10000,
    "every_n_seconds": 10,
    "verbose": False,    
    },

  "algorithm":{
    "seed": 4,
    "max_grad_norm": 0.5,
    "epsilon": 0.02,
    "n_envs": 8,
    "n_steps": 32,
    "n_updates": 32,
    "eval_interval": 2000,
    "learning_starts": 2000,
    "nb_evals": 10,
    "buffer_size": 1e6,
    "batch_size": 256,
    "target_critic_update": 5000,
    "max_epochs": 3500,
    "discount_factor": 0.99,
    "architecture":{"hidden_size": [64, 64]},
  },
  "gym_env":{
    "env_name": "CartPole-v1",
  },
  "optimizer":
  {
    "classname": "torch.optim.Adam",
    "lr": 1e-3,
  }
}

config=OmegaConf.create(params)

In [139]:
from PIL import Image

class preProcessingAgent():
    def __init__(self, parallel_gym_agent):
          self.parallel_gym_agent = parallel_gym_agent

    def preProcess(self, im):
        #plt.imshow(im) #CHANGE HERE TO SEE ORIGINAL FRAME
        #plt.show()
        img = Image.fromarray(im.astype(np.uint8))
        img_gray = img.convert('L')
        img_gray = img_gray.resize((564, 152)) #a modifier peut etre im.shape[1], im.shape[0])
        img_array = np.array(img_gray)

        return img_array

    def crop(self, im):
        im = Image.fromarray(im.astype(np.uint8))
        width, height = im.size
    
        new_width, new_height = 300, 300  # New dimensions for the crop
        left = (width - new_width)/8
        top = (height - new_height)/0.6
        right = (width + new_width)/1.5
        bottom = (height + new_height)/2.2
        
        im1 = im.crop((left, top, right, bottom))
        img_array = np.array(im1)
        return img_array

    def getFrame(self, env_agent):
        env: Env = env_agent.envs[0]
        env.reset()
        im = env.render()
        im = self.crop(im)
        im = self.preProcess(im)
        return im


In [208]:
env_agent = ParallelGymAgent(partial(make_env, config.gym_env.env_name, render_mode="rgb_array"), 1).seed(SEED) #le int a la fin indique le nombre d'environements
obs_size, action_dim = env_agent.get_obs_and_actions_sizes()
print(f"Environment: observation space in R^{obs_size} and action space R^{action_dim}")

preProc = preProcessingAgent(env_agent) # agent qui fait le pre processing


im = preProc.getFrame(env_agent) #on recupere l'image


feature_extractor = cnnAgent()
features = feature_extractor.extract_features(im) #est ce qu'on filtre les 0 pour des raisons de simplicite??
print("Extracted features shape:", features.shape)
print("Extracted features array:", features)

# en fait si on filtre les 0 on retrouve pas un array tt le temps de la meme taille, ca varie entre (1,50) et (1,52) en general (a voir...)
#non_zero_mask = features != 0
#filtered_features = features[non_zero_mask]
#print("Filtered features shape:", filtered_features.shape)
#print("Filtered features array:", filtered_features)

Environment: observation space in R^4 and action space R^2
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
Extracted features shape: (1, 100)
Extracted features array: [[13.333378   7.3049126 15.397407   0.         0.        13.111646
  30.365862   0.        13.297258   0.        14.468754   0.
   6.0612283  6.068844   0.        39.76304   30.016796   0.
   0.        17.8886    53.121674   7.5338078  0.         0.
   0.         0.         0.         0.         0.7277671  4.0468173
  23.475471   0.        13.946899   7.6253715 34.12345    0.
  16.004932   0.        36.293198   0.         0.         0.
   0.        11.083226   0.         0.         0.         7.071604
  25.153004   0.         9.039755   0.         0.         9.877197
   0.         0.         0.         0.         0.         0.
  22.020353   0.         0.         0.        22.48362   21.370718
  14.048566   4.2527947  0.        15.578289   0.        20.95405
   0.         0.         6.7438984  0.  