In [None]:
!pip install gym-super-mario-bros==7.3.0 nes_py

In [1]:
!pip install stable-baselines3[extra]



In [2]:
import gym
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from gym.wrappers import GrayScaleObservation
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from nes_py.wrappers import JoypadSpace
JoypadSpace.reset = lambda self, **kwargs: self.env.reset(**kwargs) # StackOverflow code to fix a problem when calling state.reset()

In [3]:
SIMPLE_MOVEMENT

[['NOOP'],
 ['right'],
 ['right', 'A'],
 ['right', 'B'],
 ['right', 'A', 'B'],
 ['A'],
 ['left']]

# Random Agent

In [3]:
env = gym.make("SuperMarioBros-v0",apply_api_compatibility=True,render_mode="human")
# Wrap the environment to reduce the action space : 7 instead of 256
env = JoypadSpace(env,SIMPLE_MOVEMENT)

done = True
for step in range(1000):
    if done:
        env.reset()
    action = env.action_space.sample()
    state, reward, done,_, info = env.step(action)
    env.render()
env.close()

  logger.warn(
  logger.warn(
  logger.warn(
  return (self.ram[0x86] - self.ram[0x071c]) % 256


# Preprocessing the environment

In [4]:
env = gym.make("SuperMarioBros-v0",apply_api_compatibility=True)
# Wrap the environment to reduce the action space : 7 instead of 256
env = JoypadSpace(env,SIMPLE_MOVEMENT)
# Graycscale the observation space
env= GrayScaleObservation(env, keep_dim=True)
# plt.imshow(env.reset()[0], cmap="Greys")  
# Wrap into the Dummy Environment
env = DummyVecEnv([lambda: env])
# Stack the frames (so the agent can predict the movements of ennemies)
env = VecFrameStack(env,4)
print("OBSERVATION SPACE", str(env.observation_space))
print("ACTION SPACE :",str(env.action_space))
print("RENDER :",str(env.render_mode))

  logger.warn(


OBSERVATION SPACE Box(0, 255, (240, 256, 4), uint8)
ACTION SPACE : Discrete(7)
RENDER : None




# RL Model

In [5]:
import os
from stable_baselines3 import PPO 
from stable_baselines3.common.callbacks import BaseCallback # Saving models

In [6]:
# Callback to save the model every check_freq steps 
# Don't save too often because a trained model is still quite big
class TrainAndLoggingCallback(BaseCallback): 
    def __init__(self,check_freq,save_path,verbose=1):
        super(TrainAndLoggingCallback,self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
        
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model{}'.format(self.n_calls))
            self.model.save(model_path)
        return True

In [7]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [8]:
callback = TrainAndLoggingCallback(check_freq=100000,save_path=CHECKPOINT_DIR)

In [9]:
model = PPO("CnnPolicy",env,learning_rate=0.000001,n_steps=512, tensorboard_log=LOG_DIR,verbose=1)

Using cuda device
Wrapping the env in a VecTransposeImage.


In [10]:
model.learn(total_timesteps=1000000,callback=callback)

Logging to ./logs/PPO_1


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------
| time/              |     |
|    fps             | 75  |
|    iterations      | 1   |
|    time_elapsed    | 6   |
|    total_timesteps | 512 |
----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 74           |
|    iterations           | 2            |
|    time_elapsed         | 13           |
|    total_timesteps      | 1024         |
| train/                  |              |
|    approx_kl            | 8.570496e-06 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.95        |
|    explained_variance   | 0.000873     |
|    learning_rate        | 1e-06        |
|    loss                 | 170          |
|    n_updates            | 10           |
|    policy_gradient_loss | 2.95e-05     |
|    value_loss           | 383          |
------------------------------------------
-----------------------

-------------------------------------------
| time/                   |               |
|    fps                  | 86            |
|    iterations           | 13            |
|    time_elapsed         | 77            |
|    total_timesteps      | 6656          |
| train/                  |               |
|    approx_kl            | 2.7403352e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.94         |
|    explained_variance   | -0.00422      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.105         |
|    n_updates            | 120           |
|    policy_gradient_loss | -0.000374     |
|    value_loss           | 0.226         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 86            |
|    iterations           | 14            |
|    time_elapsed         | 82  

------------------------------------------
| time/                   |              |
|    fps                  | 87           |
|    iterations           | 24           |
|    time_elapsed         | 140          |
|    total_timesteps      | 12288        |
| train/                  |              |
|    approx_kl            | 3.379234e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.00193      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.174        |
|    n_updates            | 230          |
|    policy_gradient_loss | -0.000356    |
|    value_loss           | 0.358        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 87            |
|    iterations           | 25            |
|    time_elapsed         | 145           |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 89           |
|    iterations           | 35           |
|    time_elapsed         | 199          |
|    total_timesteps      | 17920        |
| train/                  |              |
|    approx_kl            | 5.378842e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.94        |
|    explained_variance   | 0.0848       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.212        |
|    n_updates            | 340          |
|    policy_gradient_loss | -0.000287    |
|    value_loss           | 0.983        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 89            |
|    iterations           | 36            |
|    time_elapsed         | 205           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 90            |
|    iterations           | 46            |
|    time_elapsed         | 260           |
|    total_timesteps      | 23552         |
| train/                  |               |
|    approx_kl            | 2.6771566e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.93         |
|    explained_variance   | -0.0183       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0607        |
|    n_updates            | 450           |
|    policy_gradient_loss | -0.000197     |
|    value_loss           | 0.128         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 90            |
|    iterations           | 47            |
|    time_elapsed         | 265 

------------------------------------------
| time/                   |              |
|    fps                  | 90           |
|    iterations           | 57           |
|    time_elapsed         | 320          |
|    total_timesteps      | 29184        |
| train/                  |              |
|    approx_kl            | 3.879273e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.93        |
|    explained_variance   | 0.000344     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0728       |
|    n_updates            | 560          |
|    policy_gradient_loss | -0.00037     |
|    value_loss           | 0.249        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 91           |
|    iterations           | 58           |
|    time_elapsed         | 326          |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 91            |
|    iterations           | 68            |
|    time_elapsed         | 380           |
|    total_timesteps      | 34816         |
| train/                  |               |
|    approx_kl            | 8.2087936e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.92         |
|    explained_variance   | -0.00152      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.181         |
|    n_updates            | 670           |
|    policy_gradient_loss | -0.000911     |
|    value_loss           | 0.637         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 91           |
|    iterations           | 69           |
|    time_elapsed         | 386     

-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 79            |
|    time_elapsed         | 439           |
|    total_timesteps      | 40448         |
| train/                  |               |
|    approx_kl            | 4.0631625e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.91         |
|    explained_variance   | 0.00611       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0849        |
|    n_updates            | 780           |
|    policy_gradient_loss | -0.000305     |
|    value_loss           | 0.157         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 80            |
|    time_elapsed         | 444 

-------------------------------------------
| time/                   |               |
|    fps                  | 91            |
|    iterations           | 90            |
|    time_elapsed         | 501           |
|    total_timesteps      | 46080         |
| train/                  |               |
|    approx_kl            | 0.00011941686 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.9          |
|    explained_variance   | 0.143         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0766        |
|    n_updates            | 890           |
|    policy_gradient_loss | -0.000772     |
|    value_loss           | 0.189         |
-------------------------------------------
--------------------------------------------
| time/                   |                |
|    fps                  | 91             |
|    iterations           | 91             |
|    time_elapsed         | 

-------------------------------------------
| time/                   |               |
|    fps                  | 91            |
|    iterations           | 101           |
|    time_elapsed         | 564           |
|    total_timesteps      | 51712         |
| train/                  |               |
|    approx_kl            | 2.6406604e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.89         |
|    explained_variance   | -0.0151       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0727        |
|    n_updates            | 1000          |
|    policy_gradient_loss | -0.000118     |
|    value_loss           | 0.192         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 91           |
|    iterations           | 102          |
|    time_elapsed         | 570     

-------------------------------------------
| time/                   |               |
|    fps                  | 91            |
|    iterations           | 112           |
|    time_elapsed         | 624           |
|    total_timesteps      | 57344         |
| train/                  |               |
|    approx_kl            | 0.00046956877 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | 0.183         |
|    learning_rate        | 1e-06         |
|    loss                 | 88.8          |
|    n_updates            | 1110          |
|    policy_gradient_loss | 0.000967      |
|    value_loss           | 209           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 91            |
|    iterations           | 113           |
|    time_elapsed         | 630 

------------------------------------------
| time/                   |              |
|    fps                  | 92           |
|    iterations           | 123          |
|    time_elapsed         | 683          |
|    total_timesteps      | 62976        |
| train/                  |              |
|    approx_kl            | 8.330669e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.88        |
|    explained_variance   | -0.0242      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0444       |
|    n_updates            | 1220         |
|    policy_gradient_loss | -0.00051     |
|    value_loss           | 0.14         |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 124           |
|    time_elapsed         | 689           |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 134           |
|    time_elapsed         | 744           |
|    total_timesteps      | 68608         |
| train/                  |               |
|    approx_kl            | 0.00018391234 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | 0.138         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0521        |
|    n_updates            | 1330          |
|    policy_gradient_loss | -0.000716     |
|    value_loss           | 0.154         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 92           |
|    iterations           | 135          |
|    time_elapsed         | 749     

-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 145           |
|    time_elapsed         | 801           |
|    total_timesteps      | 74240         |
| train/                  |               |
|    approx_kl            | 3.4556957e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | 0.0626        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0812        |
|    n_updates            | 1440          |
|    policy_gradient_loss | -0.000209     |
|    value_loss           | 0.325         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 146           |
|    time_elapsed         | 807 

-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 156           |
|    time_elapsed         | 859           |
|    total_timesteps      | 79872         |
| train/                  |               |
|    approx_kl            | 0.00011228945 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.86         |
|    explained_variance   | 0.0152        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0542        |
|    n_updates            | 1550          |
|    policy_gradient_loss | -0.000608     |
|    value_loss           | 0.128         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 92            |
|    iterations           | 157           |
|    time_elapsed         | 865 

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 167           |
|    time_elapsed         | 918           |
|    total_timesteps      | 85504         |
| train/                  |               |
|    approx_kl            | 0.00014418049 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.84         |
|    explained_variance   | -0.0149       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.136         |
|    n_updates            | 1660          |
|    policy_gradient_loss | -0.000638     |
|    value_loss           | 0.345         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 168           |
|    time_elapsed         | 923 

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 178           |
|    time_elapsed         | 976           |
|    total_timesteps      | 91136         |
| train/                  |               |
|    approx_kl            | 7.3359115e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.81         |
|    explained_variance   | 0.0486        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0896        |
|    n_updates            | 1770          |
|    policy_gradient_loss | -0.000277     |
|    value_loss           | 0.972         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 179          |
|    time_elapsed         | 981     

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 189           |
|    time_elapsed         | 1037          |
|    total_timesteps      | 96768         |
| train/                  |               |
|    approx_kl            | 0.00041203364 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.82         |
|    explained_variance   | -0.00323      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0553        |
|    n_updates            | 1880          |
|    policy_gradient_loss | -0.00146      |
|    value_loss           | 0.111         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 190          |
|    time_elapsed         | 1042    

------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 200          |
|    time_elapsed         | 1094         |
|    total_timesteps      | 102400       |
| train/                  |              |
|    approx_kl            | 0.0001243999 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.82        |
|    explained_variance   | -0.00852     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0695       |
|    n_updates            | 1990         |
|    policy_gradient_loss | -0.000634    |
|    value_loss           | 0.208        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 201          |
|    time_elapsed         | 1100         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 211           |
|    time_elapsed         | 1153          |
|    total_timesteps      | 108032        |
| train/                  |               |
|    approx_kl            | 1.8698978e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.78         |
|    explained_variance   | 0.00697       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0493        |
|    n_updates            | 2100          |
|    policy_gradient_loss | -2.51e-05     |
|    value_loss           | 0.182         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 212           |
|    time_elapsed         | 1159

------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 222          |
|    time_elapsed         | 1211         |
|    total_timesteps      | 113664       |
| train/                  |              |
|    approx_kl            | 0.0027154367 |
|    clip_fraction        | 0.0043       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.74        |
|    explained_variance   | 0.457        |
|    learning_rate        | 1e-06        |
|    loss                 | 104          |
|    n_updates            | 2210         |
|    policy_gradient_loss | -0.000503    |
|    value_loss           | 310          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 223          |
|    time_elapsed         | 1216         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 233           |
|    time_elapsed         | 1274          |
|    total_timesteps      | 119296        |
| train/                  |               |
|    approx_kl            | 9.4423885e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.79         |
|    explained_variance   | 0.0481        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0526        |
|    n_updates            | 2320          |
|    policy_gradient_loss | -0.00038      |
|    value_loss           | 0.131         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 234           |
|    time_elapsed         | 1280

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 244           |
|    time_elapsed         | 1335          |
|    total_timesteps      | 124928        |
| train/                  |               |
|    approx_kl            | 0.00029527757 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.72         |
|    explained_variance   | 0.12          |
|    learning_rate        | 1e-06         |
|    loss                 | 0.107         |
|    n_updates            | 2430          |
|    policy_gradient_loss | -0.000864     |
|    value_loss           | 1.01          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 245          |
|    time_elapsed         | 1341    

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 255           |
|    time_elapsed         | 1393          |
|    total_timesteps      | 130560        |
| train/                  |               |
|    approx_kl            | 0.00028123986 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.73         |
|    explained_variance   | -0.00246      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.11          |
|    n_updates            | 2540          |
|    policy_gradient_loss | -0.00135      |
|    value_loss           | 0.258         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 256          |
|    time_elapsed         | 1398    

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 266           |
|    time_elapsed         | 1451          |
|    total_timesteps      | 136192        |
| train/                  |               |
|    approx_kl            | 0.00015090429 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.74         |
|    explained_variance   | -0.0705       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0612        |
|    n_updates            | 2650          |
|    policy_gradient_loss | -0.000387     |
|    value_loss           | 0.159         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 267           |
|    time_elapsed         | 1456

-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 277           |
|    time_elapsed         | 1510          |
|    total_timesteps      | 141824        |
| train/                  |               |
|    approx_kl            | 3.9703562e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.76         |
|    explained_variance   | 0.0142        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0472        |
|    n_updates            | 2760          |
|    policy_gradient_loss | 7.69e-06      |
|    value_loss           | 0.2           |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 278           |
|    time_elapsed         | 1515

------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 288          |
|    time_elapsed         | 1569         |
|    total_timesteps      | 147456       |
| train/                  |              |
|    approx_kl            | 0.0010468464 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.73        |
|    explained_variance   | 0.276        |
|    learning_rate        | 1e-06        |
|    loss                 | 101          |
|    n_updates            | 2870         |
|    policy_gradient_loss | -0.000988    |
|    value_loss           | 260          |
------------------------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 289         |
|    time_elapsed         | 1574        |
|    total_times

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 299           |
|    time_elapsed         | 1627          |
|    total_timesteps      | 153088        |
| train/                  |               |
|    approx_kl            | 5.6334422e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.75         |
|    explained_variance   | -0.0286       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0535        |
|    n_updates            | 2980          |
|    policy_gradient_loss | -0.000101     |
|    value_loss           | 0.108         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 300           |
|    time_elapsed         | 1632

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 310           |
|    time_elapsed         | 1686          |
|    total_timesteps      | 158720        |
| train/                  |               |
|    approx_kl            | 0.00022149913 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.75         |
|    explained_variance   | -0.00914      |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0775        |
|    n_updates            | 3090          |
|    policy_gradient_loss | -0.000557     |
|    value_loss           | 0.238         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 311          |
|    time_elapsed         | 1691    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 321           |
|    time_elapsed         | 1745          |
|    total_timesteps      | 164352        |
| train/                  |               |
|    approx_kl            | 0.00013867684 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.77         |
|    explained_variance   | -0.0456       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0546        |
|    n_updates            | 3200          |
|    policy_gradient_loss | -0.000374     |
|    value_loss           | 0.641         |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 322          |
|    time_elapsed         | 1750    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 332           |
|    time_elapsed         | 1803          |
|    total_timesteps      | 169984        |
| train/                  |               |
|    approx_kl            | 0.00012195797 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | -0.0439       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0467        |
|    n_updates            | 3310          |
|    policy_gradient_loss | -0.000339     |
|    value_loss           | 0.0975        |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 333           |
|    time_elapsed         | 1809

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 343           |
|    time_elapsed         | 1862          |
|    total_timesteps      | 175616        |
| train/                  |               |
|    approx_kl            | 3.0747033e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.77         |
|    explained_variance   | 0.0598        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.151         |
|    n_updates            | 3420          |
|    policy_gradient_loss | -5.42e-05     |
|    value_loss           | 1.01          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 344           |
|    time_elapsed         | 1867

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 354           |
|    time_elapsed         | 1922          |
|    total_timesteps      | 181248        |
| train/                  |               |
|    approx_kl            | 0.00034493068 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.79         |
|    explained_variance   | -0.142        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0634        |
|    n_updates            | 3530          |
|    policy_gradient_loss | -0.000979     |
|    value_loss           | 0.203         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 355           |
|    time_elapsed         | 1928

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 365           |
|    time_elapsed         | 1983          |
|    total_timesteps      | 186880        |
| train/                  |               |
|    approx_kl            | 8.8293804e-05 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.8          |
|    explained_variance   | -0.167        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0718        |
|    n_updates            | 3640          |
|    policy_gradient_loss | -8.67e-05     |
|    value_loss           | 0.158         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 366           |
|    time_elapsed         | 1989

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 376          |
|    time_elapsed         | 2046         |
|    total_timesteps      | 192512       |
| train/                  |              |
|    approx_kl            | 0.0006819611 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.78        |
|    explained_variance   | 0.000292     |
|    learning_rate        | 1e-06        |
|    loss                 | 0.134        |
|    n_updates            | 3750         |
|    policy_gradient_loss | -0.00191     |
|    value_loss           | 0.234        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 377           |
|    time_elapsed         | 2052          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 387          |
|    time_elapsed         | 2106         |
|    total_timesteps      | 198144       |
| train/                  |              |
|    approx_kl            | 0.0006492045 |
|    clip_fraction        | 0.00254      |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.74        |
|    explained_variance   | 0.719        |
|    learning_rate        | 1e-06        |
|    loss                 | 152          |
|    n_updates            | 3860         |
|    policy_gradient_loss | 0.000477     |
|    value_loss           | 358          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 388          |
|    time_elapsed         | 2111         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 398          |
|    time_elapsed         | 2167         |
|    total_timesteps      | 203776       |
| train/                  |              |
|    approx_kl            | 0.0003492724 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.75        |
|    explained_variance   | -0.0114      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0731       |
|    n_updates            | 3970         |
|    policy_gradient_loss | -0.000671    |
|    value_loss           | 0.296        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 399           |
|    time_elapsed         | 2173          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 409          |
|    time_elapsed         | 2230         |
|    total_timesteps      | 209408       |
| train/                  |              |
|    approx_kl            | 0.0008189137 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.75        |
|    explained_variance   | -0.0951      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0517       |
|    n_updates            | 4080         |
|    policy_gradient_loss | -0.00197     |
|    value_loss           | 0.108        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 410          |
|    time_elapsed         | 2236         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 93           |
|    iterations           | 420          |
|    time_elapsed         | 2290         |
|    total_timesteps      | 215040       |
| train/                  |              |
|    approx_kl            | 0.0003929612 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.72        |
|    explained_variance   | -0.019       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0964       |
|    n_updates            | 4190         |
|    policy_gradient_loss | -0.00111     |
|    value_loss           | 0.303        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 93            |
|    iterations           | 421           |
|    time_elapsed         | 2295          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 431           |
|    time_elapsed         | 2345          |
|    total_timesteps      | 220672        |
| train/                  |               |
|    approx_kl            | 0.00011333183 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.71         |
|    explained_variance   | -0.0902       |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0733        |
|    n_updates            | 4300          |
|    policy_gradient_loss | -0.000418     |
|    value_loss           | 1.16          |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 432          |
|    time_elapsed         | 2350    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 442           |
|    time_elapsed         | 2398          |
|    total_timesteps      | 226304        |
| train/                  |               |
|    approx_kl            | 0.00013344886 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.68         |
|    explained_variance   | -0.274        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0977        |
|    n_updates            | 4410          |
|    policy_gradient_loss | -0.000102     |
|    value_loss           | 1.44          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 443           |
|    time_elapsed         | 2403

-----------------------------------------
| time/                   |             |
|    fps                  | 94          |
|    iterations           | 453         |
|    time_elapsed         | 2453        |
|    total_timesteps      | 231936      |
| train/                  |             |
|    approx_kl            | 0.000515918 |
|    clip_fraction        | 0           |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.66       |
|    explained_variance   | -0.0537     |
|    learning_rate        | 1e-06       |
|    loss                 | 0.0539      |
|    n_updates            | 4520        |
|    policy_gradient_loss | -0.00114    |
|    value_loss           | 0.119       |
-----------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 454           |
|    time_elapsed         | 2458          |
|    total_timesteps    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 464           |
|    time_elapsed         | 2511          |
|    total_timesteps      | 237568        |
| train/                  |               |
|    approx_kl            | 0.00079756626 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.68         |
|    explained_variance   | 0.0622        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0409        |
|    n_updates            | 4630          |
|    policy_gradient_loss | -0.00157      |
|    value_loss           | 0.4           |
-------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 465          |
|    time_elapsed         | 2516    

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 475           |
|    time_elapsed         | 2566          |
|    total_timesteps      | 243200        |
| train/                  |               |
|    approx_kl            | 0.00026532984 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.68         |
|    explained_variance   | -0.162        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.157         |
|    n_updates            | 4740          |
|    policy_gradient_loss | -0.000871     |
|    value_loss           | 0.549         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 476           |
|    time_elapsed         | 2571

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 486          |
|    time_elapsed         | 2621         |
|    total_timesteps      | 248832       |
| train/                  |              |
|    approx_kl            | 0.0009025051 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.69        |
|    explained_variance   | -0.0687      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0497       |
|    n_updates            | 4850         |
|    policy_gradient_loss | -0.00192     |
|    value_loss           | 0.127        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 487          |
|    time_elapsed         | 2626         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 497          |
|    time_elapsed         | 2680         |
|    total_timesteps      | 254464       |
| train/                  |              |
|    approx_kl            | 9.802636e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.68        |
|    explained_variance   | 0.624        |
|    learning_rate        | 1e-06        |
|    loss                 | 20.6         |
|    n_updates            | 4960         |
|    policy_gradient_loss | -0.000653    |
|    value_loss           | 112          |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 498           |
|    time_elapsed         | 2687          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 508          |
|    time_elapsed         | 2740         |
|    total_timesteps      | 260096       |
| train/                  |              |
|    approx_kl            | 0.0002733596 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.7         |
|    explained_variance   | 0.0412       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0596       |
|    n_updates            | 5070         |
|    policy_gradient_loss | -0.00108     |
|    value_loss           | 0.152        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 509           |
|    time_elapsed         | 2746          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 519          |
|    time_elapsed         | 2800         |
|    total_timesteps      | 265728       |
| train/                  |              |
|    approx_kl            | 0.0005483752 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.7         |
|    explained_variance   | 0.00126      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.114        |
|    n_updates            | 5180         |
|    policy_gradient_loss | -0.000666    |
|    value_loss           | 0.356        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 520           |
|    time_elapsed         | 2805          |
|    t

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 530          |
|    time_elapsed         | 2854         |
|    total_timesteps      | 271360       |
| train/                  |              |
|    approx_kl            | 6.839819e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.61        |
|    explained_variance   | 0.707        |
|    learning_rate        | 1e-06        |
|    loss                 | 88.4         |
|    n_updates            | 5290         |
|    policy_gradient_loss | -0.000716    |
|    value_loss           | 117          |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 531          |
|    time_elapsed         | 2859         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 541          |
|    time_elapsed         | 2909         |
|    total_timesteps      | 276992       |
| train/                  |              |
|    approx_kl            | 0.0007167058 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.56        |
|    explained_variance   | 0.0748       |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0581       |
|    n_updates            | 5400         |
|    policy_gradient_loss | -0.00144     |
|    value_loss           | 0.0962       |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 542          |
|    time_elapsed         | 2914         |
|    total_

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 552           |
|    time_elapsed         | 2965          |
|    total_timesteps      | 282624        |
| train/                  |               |
|    approx_kl            | 0.00068441103 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.48         |
|    explained_variance   | -0.157        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.054         |
|    n_updates            | 5510          |
|    policy_gradient_loss | -0.00273      |
|    value_loss           | 0.155         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 553           |
|    time_elapsed         | 2970

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 563          |
|    time_elapsed         | 3025         |
|    total_timesteps      | 288256       |
| train/                  |              |
|    approx_kl            | 7.371511e-05 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.44        |
|    explained_variance   | -0.0796      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.183        |
|    n_updates            | 5620         |
|    policy_gradient_loss | -0.000125    |
|    value_loss           | 0.322        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 564          |
|    time_elapsed         | 3030         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 574          |
|    time_elapsed         | 3083         |
|    total_timesteps      | 293888       |
| train/                  |              |
|    approx_kl            | 0.0004434795 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.9          |
|    learning_rate        | 1e-06        |
|    loss                 | 24.5         |
|    n_updates            | 5730         |
|    policy_gradient_loss | -7.72e-05    |
|    value_loss           | 87.6         |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 575          |
|    time_elapsed         | 3088         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 585          |
|    time_elapsed         | 3140         |
|    total_timesteps      | 299520       |
| train/                  |              |
|    approx_kl            | 0.0003371589 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.31        |
|    explained_variance   | 0.195        |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0464       |
|    n_updates            | 5840         |
|    policy_gradient_loss | -0.00069     |
|    value_loss           | 0.145        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 586          |
|    time_elapsed         | 3146         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 95           |
|    iterations           | 596          |
|    time_elapsed         | 3204         |
|    total_timesteps      | 305152       |
| train/                  |              |
|    approx_kl            | 0.0006581566 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.19        |
|    explained_variance   | 0.00818      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.114        |
|    n_updates            | 5950         |
|    policy_gradient_loss | -0.00143     |
|    value_loss           | 0.429        |
------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 597           |
|    time_elapsed         | 3210          |
|    t

-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 607           |
|    time_elapsed         | 3267          |
|    total_timesteps      | 310784        |
| train/                  |               |
|    approx_kl            | 0.00010757567 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.2          |
|    explained_variance   | -0.12         |
|    learning_rate        | 1e-06         |
|    loss                 | 0.125         |
|    n_updates            | 6060          |
|    policy_gradient_loss | -0.000336     |
|    value_loss           | 1.02          |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 95            |
|    iterations           | 608           |
|    time_elapsed         | 3273

-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 618           |
|    time_elapsed         | 3331          |
|    total_timesteps      | 316416        |
| train/                  |               |
|    approx_kl            | 0.00043636176 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.33         |
|    explained_variance   | -0.197        |
|    learning_rate        | 1e-06         |
|    loss                 | 0.0642        |
|    n_updates            | 6170          |
|    policy_gradient_loss | -0.000277     |
|    value_loss           | 0.141         |
-------------------------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 94            |
|    iterations           | 619           |
|    time_elapsed         | 3337

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 629          |
|    time_elapsed         | 3394         |
|    total_timesteps      | 322048       |
| train/                  |              |
|    approx_kl            | 0.0006599921 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.34        |
|    explained_variance   | -0.0548      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.0602       |
|    n_updates            | 6280         |
|    policy_gradient_loss | -0.00155     |
|    value_loss           | 0.119        |
------------------------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 630          |
|    time_elapsed         | 3400         |
|    total_

------------------------------------------
| time/                   |              |
|    fps                  | 94           |
|    iterations           | 640          |
|    time_elapsed         | 3455         |
|    total_timesteps      | 327680       |
| train/                  |              |
|    approx_kl            | 0.0013715683 |
|    clip_fraction        | 0            |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.33        |
|    explained_variance   | -0.0675      |
|    learning_rate        | 1e-06        |
|    loss                 | 0.151        |
|    n_updates            | 6390         |
|    policy_gradient_loss | -0.00164     |
|    value_loss           | 0.647        |
------------------------------------------



KeyboardInterrupt



# Testing trained model

In [11]:
model = PPO.load("./train/best_model300000.zip")

In [13]:
env = gym.make("SuperMarioBros-v0",apply_api_compatibility=True,render_mode="human")
env = JoypadSpace(env,SIMPLE_MOVEMENT)
env= GrayScaleObservation(env, keep_dim=True)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env,4)

frames= []
state = env.reset()
for steps in range(1000):
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    frame = env.render()
    frames.append(frame)
env.close()

# To improve the model :

- Reduce the learning_rate
- Train for longer : 1 million epochs should be a minimum


# Optional : Create a mp4 video of the agent

In [14]:
import cv2
env = gym.make("SuperMarioBros-v0",apply_api_compatibility=True,render_mode="rgb_array")
env = JoypadSpace(env,SIMPLE_MOVEMENT)
env= GrayScaleObservation(env, keep_dim=True)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env,4)

frames= []
state = env.reset()
for steps in range(1000):
    action, _ = model.predict(state)
    state, reward, done, info = env.step(action)
    frame = env.render()
    frames.append(frame)
env.close()

output_video_file = 'agent_demo.mp4'
frame_size = (frames[0].shape[1], frames[0].shape[0])  
fps = 60  
codec = cv2.VideoWriter_fourcc(*'mp4v')
video_writer = cv2.VideoWriter(output_video_file, codec, fps, frame_size)
for frame in frames:
    bgr_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    video_writer.write(bgr_frame)

video_writer.release()