In [1]:
!pip install pyarrow



In [2]:
import os
import sys
import random
import pandas as pd
import numpy as np
import pyarrow

import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

2024-03-04 17:42:53.813220: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
class DataCollector:
    def __init__(self, env, models, num_episodes = 10, seed = 42):
        self.env = env
        self.models = models

        #set seed
        random.seed(seed)
        
        # Generate random integers
        upper_bound = sys.maxsize 
        self.env_seeds = [random.randrange(upper_bound) for _ in range(num_episodes)]

    def collect(self):
        #create the folder for the env
        env_name = env.unwrapped.envs[0].spec.id
        os.makedirs(env_name, exist_ok=True)
        # Iterate over models
        for index_model, model in enumerate(self.models):
            # Create a folder for each model
            print("Using the model number:", index_model + 1, "...")
            model_folder = os.path.join(env_name, "model_" + str(index_model))
            os.makedirs(model_folder, exist_ok=True)

            #Some model seems to not be compatible with env
            try:
                # Iterate over elements seeds
                for index_seed, seed in enumerate(self.env_seeds):
                    print("Seed :", seed)
                    seed_name = "seed_" + str(seed)
                    output_file = os.path.join(model_folder, seed_name)
                    self.run_episode(model, seed, output_file)
            except Exception as e:
                print("An exception occurred:", e)
                print("Skipping model.")
                

    def run_episode(self,model, seed, output_file):
        #Skip if the file exist
        output_file = output_file + ".parquet"
        if os.path.exists(output_file):
            return
        
        observation = self.env.reset()
        self.env.seed(seed)
        episode_observations = []
        episode_rewards = []
        episode_actions = []

        done = False
        
        while not done:
            # Select an action from each model
            action, _ = model.predict(observation)

            # Store observation, reward, and action
            episode_observations.append(observation)
            episode_actions.append(action)

            observation, reward, done, _ = self.env.step(action)
            episode_rewards.append(reward)

        # Putting reward in the original paper format
        for index in range(len(episode_rewards)):
            episode_rewards[index] = np.sum(episode_rewards[index:])

        # create the dataframe
        episode_df = pd.DataFrame(
                {'observation': episode_observations,
                 'rewards': episode_rewards, 
                 'action': episode_actions
                })
        
        #Multidimensional array are not compatible with parquet
        episode_df['observation'] = episode_df['observation'].apply(lambda x: x.ravel())
        episode_df['action'] = episode_df['action'].apply(lambda x: x.ravel())
        
        # Save the DataFrame to a file in an efficient format
        episode_df.to_parquet(output_file)

In [4]:
def find_zip_files(directory):
    zip_files = []

    # Traverse through all directories and files recursively
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file has a .zip extension
            if file.endswith('.zip'):
                # Append the absolute path of the zip file to the list
                zip_files.append(os.path.join(root, file))

    return zip_files

In [5]:
# Example usage:
directory_path = '../models'
zip_files = find_zip_files(directory_path)
models = []

for zip_file in zip_files:
    models.append(PPO.load(zip_file))

Number of learnable parameters: 2808512
Number of learnable parameters: 2768288
Number of learnable parameters: 2728064
Number of learnable parameters: 2848736
Number of learnable parameters: 2808512
Number of learnable parameters: 2768288
Number of learnable parameters: 2728064
Number of learnable parameters: 2848736
Number of learnable parameters: 2808512
Number of learnable parameters: 2768288
Number of learnable parameters: 2728064
Number of learnable parameters: 2848736


In [6]:
env_id = 'CarRacing-v2'
render_mode = "rgb_array"

env = DummyVecEnv([lambda: gym.make(env_id, render_mode=render_mode)])

In [7]:
data_collector = DataCollector(env, models, num_episodes = 20)
data_collector.collect()

Using the model number: 1 ...
Seed : 1026847926404610461
Seed : 6839596180442651345
Seed : 2258728696626565719
Seed : 1287010195568088798
Seed : 945351111358374057
Seed : 6831454145019129197
Seed : 5030118478018109776
Seed : 5446332115853614978
Seed : 293143515607798169
Seed : 864186096263678574
Seed : 2145917995920697483
Seed : 5552642720119322321
Seed : 5176572018665656243
Seed : 6604115266222097961
Seed : 6468581141273039753
Seed : 3869387381345962230
Seed : 4143222138124291040
Seed : 2565856380474498476
Seed : 8017880294935156107
Seed : 6998762617471362847
Seed : 1472597238169086909
Seed : 3897929836277613434
Seed : 2562910770574305398
Seed : 1985918923296309552
Seed : 7041990410826934883
Seed : 942723427174387577
Seed : 3504210894617719057
Seed : 3311000352010168215
Seed : 3172431589140685843
Seed : 2439774329912543747
Seed : 400759557860619647
Seed : 4237446567370404816
Seed : 1151318126898855557
Seed : 8506673322251918668
Seed : 726803524325187859
Seed : 2704092307918063857
Seed

KeyboardInterrupt: 

In [None]:
pd.__version__