In [1]:
import gymnasium as gym
import minigrid
import torch
import torch.nn as nn
import numpy as np
from torchvision.transforms import ToTensor
import cv2
from torch.utils.data import TensorDataset
from torchvision import datasets
from tqdm import tqdm

Create an environment and run the environment with random sampled actions to generate data for the model in the form 
`(initial_frame, action, next_frame)`

In [2]:
env = gym.make("MiniGrid-Empty-5x5-v0", render_mode="rgb_array")
obs, info = env.reset()

In [3]:
dataset = []
for episodes in range(550):
    done =  False
    obs, _ = env.reset()
    frame = env.render()
    while not done:
        action = env.action_space.sample()
        obs, reward, done, truncated, info = env.step(action=action)
        next_frame = env.render() 

        dataset.append([frame,action,next_frame])
        frame = next_frame
env.close()

Process the `dataset` array and convert the stored values into an `np` array and transpose to store in the (C, H, W) format

In [4]:
processed_samples = []
for initial_state, action, final_state in dataset:
    initial_state = np.array(initial_state,dtype=np.float32)
    final_state = np.array(final_state, np.float32)
    
    initial_state = cv2.resize(initial_state, (64 , 64))
    final_state = cv2.resize(final_state, (64,64))
    
    initial_state /= 255.0
    final_state /= 255.0
    
    processed_samples.append((initial_state,action,final_state))

In [5]:
initial_frames = np.array([s[0] for s in processed_samples],dtype=np.float32)
actions = np.array([s[1] for s in processed_samples],dtype=np.int32)
final_frames = np.array([s[2] for s in processed_samples],dtype=np.float32)

In [6]:
initial_frames = initial_frames.transpose((0,3,1,2))
final_frames = final_frames.transpose((0,3,1,2))

Save the `np` arrays

In [7]:
np.save('images.npy', initial_frames)
np.save('actions.npy', actions)
np.save('next_frames.npy', final_frames)