In [1]:
from typing import Tuple
import gym
from src import maze
from os import listdir
from os.path import isfile, join

In [2]:
mypath = f'./src/environment/mazes/mazes5/train/'
mazes = [join(mypath, f) for f in listdir(mypath) if isfile(join(mypath, f))]

maze_info = []
for maze in mazes:
    with open(maze, "r") as f:
        for line in f:
            maze_info.append(line)

In [3]:
import numpy as np
import pandas as pd

In [4]:
x = np.load("./tmp/expert/maze5/dataset.npy")
dataframe = pd.DataFrame(
    x,
    columns=[
        "maze version",
        "solution number",
        "state",
        "action",
        "next_state",
        "episode reward",
        "reward",
        "episode start",
        "episode end"
    ]
)
dataframe

Unnamed: 0,maze version,solution number,state,action,next_state,episode reward,reward,episode start,episode end
0,0.0,0.0,0.0,1.0,1.0,0.000,-0.004,1.0,0.0
1,0.0,0.0,1.0,0.0,2.0,0.000,-0.004,0.0,0.0
2,0.0,0.0,2.0,0.0,3.0,0.000,-0.004,0.0,0.0
3,0.0,0.0,3.0,0.0,4.0,0.000,-0.004,0.0,0.0
4,0.0,0.0,4.0,3.0,5.0,0.000,-0.004,0.0,0.0
...,...,...,...,...,...,...,...,...,...
3609,99.0,0.0,4410.0,0.0,4411.0,0.000,-0.004,0.0,0.0
3610,99.0,0.0,4411.0,1.0,4412.0,0.000,-0.004,0.0,0.0
3611,99.0,0.0,4412.0,1.0,4413.0,0.000,-0.004,0.0,0.0
3612,99.0,0.0,4413.0,1.0,4414.0,0.000,-0.004,0.0,0.0


In [21]:
shortest_dataframe = dataframe.iloc[:0].copy()
for idx in range(dataframe["maze version"].max().astype(int) + 1):
    amount_solutions = dataframe[dataframe["maze version"] == idx]["solution number"].max().astype(int)
    solution_indexes = [solution_idx for solution_idx in range(amount_solutions + 1)]
    path_len = [
        dataframe[
            (dataframe["maze version"] == idx) &
            (dataframe["solution number"] == solution_idx)
        ].shape[0]
        for solution_idx in solution_indexes
    ]
    index = np.argmin(path_len)
    shortest_dataframe = pd.concat([
        shortest_dataframe,
        dataframe[
            (dataframe["maze version"] == idx) &
            (dataframe["solution number"] == index)
        ]],
        ignore_index=True
    )
shortest_dataframe

Unnamed: 0,maze version,solution number,state,action,next_state,episode reward,reward,episode start,episode end
0,0.0,0.0,0.0,1.0,1.0,0.000,-0.004,1.0,0.0
1,0.0,0.0,1.0,0.0,2.0,0.000,-0.004,0.0,0.0
2,0.0,0.0,2.0,0.0,3.0,0.000,-0.004,0.0,0.0
3,0.0,0.0,3.0,0.0,4.0,0.000,-0.004,0.0,0.0
4,0.0,0.0,4.0,3.0,5.0,0.000,-0.004,0.0,0.0
...,...,...,...,...,...,...,...,...,...
1205,99.0,0.0,4410.0,0.0,4411.0,0.000,-0.004,0.0,0.0
1206,99.0,0.0,4411.0,1.0,4412.0,0.000,-0.004,0.0,0.0
1207,99.0,0.0,4412.0,1.0,4413.0,0.000,-0.004,0.0,0.0
1208,99.0,0.0,4413.0,1.0,4414.0,0.000,-0.004,0.0,0.0


In [6]:
from collections import defaultdict
import numpy as np

def create_npz(df, name):
    dataset = defaultdict(list)
    df = df.copy().reset_index().drop("index", axis=1)
    for start, end in zip(df[df["episode start"] == 1].index, df[df["episode end"] == 1].index):
        for row in range(start, end+1):
            data = df.iloc[row]
            dataset["obs"].append(f"{int(data['state'])}.npy")
            dataset["actions"].append(int(data["action"]))
            dataset["rewards"].append(data["reward"])
            dataset["episode_starts"].append(data["episode start"])
            dataset["maze"].append(maze_info[int(data["maze version"])])
            if row == end:
                dataset["obs"].append(f"{int(data['next_state'])}.npy")
                dataset["actions"].append(int(data["action"]))
                dataset["rewards"].append(data["reward"])
                dataset["episode_starts"].append(data["episode start"])
                dataset["maze"].append(maze_info[int(data["maze version"])])
    
    np.savez(name, **dataset)
    return dataset

In [7]:
all_routes = create_npz(dataframe, "all_routes.npz")
single_route = create_npz(dataframe[dataframe["solution number"] == 0], "single_route.npz")
shortest_route = create_npz(shortest_dataframe, "shortest_route.npz")

In [8]:
from imitation_datasets.dataset.huggingface import baseline_to_huggingface
baseline_to_huggingface("./all_routes.npz", "./all_routes.jsonl", keys=list(all_routes.keys()))
baseline_to_huggingface("./single_route.npz", "./single_route.jsonl", keys=list(single_route.keys()))
baseline_to_huggingface("./shortest_route.npz", "./shortest_route.jsonl", keys=list(shortest_route.keys()))

  from .autonotebook import tqdm as notebook_tqdm
Writing into file: 100%|█████████████████████████| 3882/3882 [00:00<00:00, 270861.35it/s]
Writing into file: 100%|█████████████████████████| 1422/1422 [00:00<00:00, 298842.58it/s]
Writing into file: 100%|█████████████████████████| 1310/1310 [00:00<00:00, 218984.43it/s]


In [9]:
import tarfile

with tarfile.open("dataset.tar.gz", "w:gz") as tar:
    tar.add("all_routes.jsonl")
    tar.add("single_route.jsonl")
    tar.add("shortest_route.jsonl")

In [None]:
import os
from os import listdir
from os.path import isfile, join
import tarfile
from PIL import Image
import numpy as np

if not os.path.exists("./tmp/images/"):
    os.makedirs("./tmp/images/")

for f in dataset["obs"]:
    path = f"./tmp/expert/maze5/{f}"
    Image.fromarray(np.load(path)).save(f"./tmp/images/{f.split('.')[0]}.jpg")

with tarfile.open("images.tar.gz", "w:gz") as tar:
    tar.add("./tmp/images", "images")

In [17]:
import datasets
from datasets import load_dataset

dataset = load_dataset(
    "NathanGavenski/imagetest",
    split="shortest_route",
    trust_remote_code=True,
    download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD
)

Downloading builder script: 100%|███████████████████| 2.71k/2.71k [00:00<00:00, 21.8MB/s]
Downloading data: 100%|█████████████████████████████| 7.28M/7.28M [00:00<00:00, 25.4MB/s]
Downloading data: 100%|█████████████████████████████| 45.2k/45.2k [00:00<00:00, 8.63MB/s]
Generating all_routes split: 100%|█████████| 3882/3882 [00:00<00:00, 28969.21 examples/s]
Generating single_route split: 100%|███████| 1422/1422 [00:00<00:00, 18595.15 examples/s]
Generating shortest_route split: 100%|█████| 1310/1310 [00:00<00:00, 17685.64 examples/s]


In [18]:
dataset

Dataset({
    features: ['obs', 'actions', 'rewards', 'episode_starts', 'maze'],
    num_rows: 1310
})

In [10]:
from imitation_datasets.dataset import BaselineDataset
from torchvision import transforms
dataset = BaselineDataset(
    "NathanGavenski/imagetest",
    source="hf",
    hf_split="shortest_route",
    transform=transforms.Compose([transforms.Resize(120)])
)

Creating dataset: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 11563.79it/s]


In [None]:
dataset[0]

In [None]:
from tqdm import tqdm
import datasets
from datasets import load_dataset
from imitation_datasets.dataset.huggingface import huggingface_to_baseline

dataset = load_dataset("NathanGavenski/CartPole-v1", split="train", trust_remote_code=True, download_mode=datasets.DownloadMode.FORCE_REDOWNLOAD)
data = huggingface_to_baseline(dataset)
if len(data["obs"].shape) == 1:
    data["obs"] = data["obs"].reshape((-1, 1))

In [None]:
import numpy as np

shape = [1] if isinstance(data["obs"][0], str) else data["obs"].shape[1:]
states = np.ndarray(shape=(0, *shape))
next_states = np.ndarray(shape=(0, *shape))

if len(data["actions"].shape) == 1:
    action_size = 1
else:
    action_size = data["actions"].shape[-1]
actions = np.ndarray(shape=(0, action_size))

episode_starts = list(np.where(data["episode_starts"] == 1)[0])
episode_starts.append(len(data["episode_starts"]))

for start, end in zip(episode_starts, tqdm(episode_starts[1:], desc="Creating dataset")):
    episode = data["obs"][start:end]
    actions = data["actions"][start:end].reshape((-1, 1))
    actions = np.append(actions, actions[:-1], axis=0)
    states = np.append(states, episode[:-1], axis=0)
    next_states = np.append(next_states, episode[1:], axis=0)

In [None]:
from imitation_datasets.dataset import BaselineDataset
from torchvision import transforms
dataset = BaselineDataset("NathanGavenski/imagetest", source="hf", transform=transforms.Resize((200, 200)))
# dataset = BaselineDataset("NathanGavenski/CartPole-v1", source="hf")
state, action, next_state = dataset[0]
state.shape, action, next_state.shape

In [None]:
from benchmark.methods.policies.cnn import CNN
from benchmark.methods.policies import MLP
import gym
from src import maze
import torch

In [None]:
env = gym.make("Maze-v0", shape=(5, 5), screen_width=84, screen_height=84)
env.observation_space.shape

In [None]:
env = gym.make("CartPole-v1")
env.observation_space.shape

In [None]:
env = gym.make("PongNoFrameskip-v4")
env.observation_space.shape

In [None]:
env = gym.make("Maze-v0", shape=(5, 5), screen_width=84, screen_height=84)
observation_size = env.observation_space.shape
cnn = CNN(observation_size)

with torch.no_grad():
    output = cnn(torch.zeros(1, *observation_size[::-1]))

policy = MLP(output.shape[-1], env.action_space.n)

In [None]:
from torch import nn
model = nn.Sequential(
    cnn,
    policy
)

In [None]:
model(torch.zeros(1, *observation_size[::-1]))

In [1]:
import torch
from torchvision.transforms import ToTensor
from benchmark.methods import IUPE as Method
import gym
from src import maze

env = gym.make("Maze-v0", shape=(5, 5), screen_width=64, screen_height=64, visual=True)

method = Method(env)
method.predict(env.reset(), transforms=ToTensor())

  from .autonotebook import tqdm as notebook_tqdm


array(3)

In [18]:
from benchmark.methods import BC
import gym
from src import maze

env = gym.make("CartPole-v1")
method = BC(env)
method.predict(env.reset())

array(1)

In [4]:
from torchvision.transforms import ToTensor
from benchmark.methods import IUPE as Method
import gymnasium as gym
from src import maze
from gymnasium.wrappers import ResizeObservation

env = gym.make("Pong-v4")
env = ResizeObservation(env, 64)
method = Method(env)
method.predict(env.reset()[0], transforms=ToTensor())

array(2)

In [None]:
import gymnasium as gym
from src import maze
from gymnasium.wrappers import ResizeObservation

env = gym.make("PongNoFrameskip-v4")
env = ResizeObservation(env, 64)
env.reset()[0].shape

In [4]:
env.close()

In [1]:
from collections import defaultdict
from os import listdir
from os.path import isfile, join
from tqdm import tqdm

from benchmark.methods import BC
import gym
import numpy as np

from src import maze

def enjoy(self, render, maze_paths, maze_settings, transforms):
    metrics = defaultdict(int)
    
    for maze_type in ["train", "eval"]:
        path = f"{maze_paths}/{maze_type}"
        structures = [join(path, f) for f in listdir(path) if isfile(join(path, f))]

        average_reward = []
        success_rate = []

        if self.verbose:
            structures = tqdm(structures, desc="teste")
        for structure in structures:
            env = gym.make("Maze-v0", **maze_settings)
            done = False
            
            try:
                obs = env.load(structure)
                accumulated_reward = 0
                early_stop_count = 0
                while not done:
                    action = self.predict(obs, transforms)
                    next_obs, reward, done, _ = env.step(action)
                    accumulated_reward += reward
                    if (obs == next_obs).all():
                        early_stop_count += 1
                    else:
                        early_stop_count = 0

                    if early_stop_count == 5:
                        step_reward = -.1 / (env.shape[0] * env.shape[1])
                        lower_reward = env.max_episode_steps * step_reward
                        accumulated_reward = lower_reward
                    obs = next_obs.copy()
            finally:
                env.close()

            success_rate.append(1 if done else 0)
            average_reward.append(accumulated_reward)
        metrics[f"{maze_type} aer"] = np.mean(average_reward)
        metrics[f"{maze_type} aer (std)"] = np.std(average_reward)
        metrics[f"{maze_type} sr"] = np.mean(success_rate)
    return metrics


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import types
from torchvision.transforms import ToTensor

params = {
    "shape": (5, 5),
    "screen_width": 64,
    "screen_height": 64,
    "visual": True,
    
}
method = BC(gym.make("Maze-v0", **params), verbose=True)
method._enjoy = types.MethodType(enjoy, method)
# method._enjoy(False, "./src/environment/mazes/mazes5", params, ToTensor())

In [3]:
import gym
env = gym.make("Maze-v0", **params)

In [6]:
state = env.reset()

In [16]:
step_reward = -.1 / (env.shape[0] * env.shape[1])
env.max_episode_steps * step_reward

-4.0

In [42]:
from collections import defaultdict

early_stop = defaultdict(int)

state = env.reset()
state = tuple(state.flatten().tolist())
early_stop[state] += 1

(255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 0, 0, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,

In [50]:
state, *_ = env.step(1)
state = tuple(state.flatten().tolist())
early_stop[state] += 1

In [52]:
len(list(early_stop.keys())), np.max(list(early_stop.values()))

(2, 6)