In [1]:
from collections import defaultdict
from os import listdir
from os.path import isfile, join
from functools import partial
import types
import gym
from src import maze
import torch
from torchvision import transforms
from benchmark.methods import BC
import time
import numpy as np
from PIL import Image
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from imitation_datasets.dataset import BaselineDataset

dataset = BaselineDataset(
    "NathanGavenski/imagetest",
    source="hf",
    hf_split="shortest_route",
    transform=transforms.Resize(64)
)


Creating dataset: 100%|█████████████████████████████████████████| 100/100 [00:00<00:00, 15928.54it/s]


In [3]:
dataset.actions.shape, dataset.states.shape

(torch.Size([1210, 1]), (1210, 1))

In [4]:
new_state = dataset.states.repeat(10).reshape(-1, 1)
new_next_states = dataset.next_states.repeat(10)
actions = torch.from_numpy(dataset.actions.numpy().repeat(10)).view((-1, 1))

In [61]:
new_state[0]

array(['/home/nathan/.cache/huggingface/datasets/downloads/extracted/42e5534c8a7bc4fea430f96b2125d590aff88aa689dfde0c000933de111d2e8d/images/0.jpg'],
      dtype='<U141')

In [62]:
dataset.states[0]

array(['/home/nathan/.cache/huggingface/datasets/downloads/extracted/42e5534c8a7bc4fea430f96b2125d590aff88aa689dfde0c000933de111d2e8d/images/0.jpg'],
      dtype='<U141')

In [5]:
def enjoy(self, maze_paths, maze_settings, transforms):
    metrics = defaultdict(int)
    solutions = defaultdict(list)

    for maze_type in ["train", "eval"]:
        path = f"{maze_paths}/{maze_type}"
        structures = [join(path, f) for f in listdir(path) if isfile(join(path, f))]

        average_reward = []
        success_rate = []

        if self.verbose:
            structures = tqdm(structures, desc=f"eval with {maze_type}")
        for structure in structures:
            env = gym.make("Maze-v0", **maze_settings)
            done = False

            try:
                obs = env.load(structure)
                accumulated_reward = 0
                early_stop_count = defaultdict(int)
                while not done:
                    action = self.predict(obs, transforms)
                    obs, reward, done, _ = env.step(action)
                    accumulated_reward += reward
                    early_stop_count[tuple(obs.flatten().tolist())] += 1

                    if np.max(list(early_stop_count.values())) >= 5:
                        step_reward = -.1 / (env.shape[0] * env.shape[1])
                        lower_reward = env.max_episode_steps * step_reward
                        accumulated_reward = lower_reward
                        break

                if done:
                    solutions[maze_type].append(structure)

            finally:
                env.close()

            success_rate.append(1 if done else 0)
            average_reward.append(accumulated_reward)
        metrics[f"{maze_type} aer"] = np.mean(average_reward)
        metrics[f"{maze_type} aer (std)"] = np.std(average_reward)
        metrics[f"{maze_type} sr"] = np.mean(success_rate)
    metrics["aer"] = metrics["train sr"]
    return metrics, solutions

In [6]:
params = {
    "shape": (5, 5),
    # "screen_width": 64,
    # "screen_height": 64,
    "visual": True,
}
env = gym.make("Maze-v0", **params)
bc = BC(env, config_file="./configs/resnet.yaml")
bc.load()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(64),
])



In [7]:
enjoy = partial(enjoy, maze_paths="./src/environment/mazes/mazes5", maze_settings=params, transforms=transform)
bc._enjoy = types.MethodType(enjoy, bc)
metrics, solutions = bc._enjoy()

In [32]:
for key, value in solutions.items():
    if key == "train":
        all_images = None
        for structure in value:
            image = env.load(structure)
            if all_images is None:
                all_images = image
            else:
                border = np.zeros((64, 2, 3)).astype("uint8")
                all_images = np.hstack((all_images, border))
                all_images = np.hstack((all_images, image))
            env.close()
        Image.fromarray(all_images).save("train.png")
    else:
        all_images = None
        for structure in value:
            image = env.load(structure)
            if all_images is None:
                all_images = image
            else:
                border = np.zeros((64, 2, 3)).astype("uint8")
                all_images = np.hstack((all_images, border))
                all_images = np.hstack((all_images, image))
            env.close()
        Image.fromarray(all_images).save("eval.png")

(64, 724, 3)


In [25]:
!xdg-open .

In [4]:
params = {
    "shape": (5, 5),
    "screen_width": 600,
    "screen_height": 600,
    "visual": True,
    "occlusion": False
}
env = gym.make("Maze-v0", **params)
bc = BC(env, config_file="./configs/att.yaml")

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(64),
])



In [11]:
for w in tqdm(range(0, 101, 10)):
    bc.load(name=str(w))
    bc.policy.train()
    with torch.no_grad():
        acc = 0
        for i in range(len(dataset)):
            s, a, ns = dataset[i]
            action = torch.argmax(bc.forward(s[None]), dim=1).squeeze()
            acc += action == a.squeeze()
    print(w, acc / len(dataset))

  9%|██████▎                                                               | 1/11 [00:12<02:07, 12.73s/it]

0 tensor(0.5537)


 18%|████████████▋                                                         | 2/11 [00:31<02:23, 16.00s/it]

10 tensor(0.9620)


 27%|███████████████████                                                   | 3/11 [00:48<02:13, 16.69s/it]

20 tensor(0.9926)


 36%|█████████████████████████▍                                            | 4/11 [01:06<02:00, 17.20s/it]

30 tensor(0.9967)


 45%|███████████████████████████████▊                                      | 5/11 [01:24<01:45, 17.55s/it]

40 tensor(0.9893)


 55%|██████████████████████████████████████▏                               | 6/11 [01:41<01:27, 17.47s/it]

50 tensor(0.9983)


 64%|████████████████████████████████████████████▌                         | 7/11 [01:59<01:09, 17.34s/it]

60 tensor(0.9942)


 73%|██████████████████████████████████████████████████▉                   | 8/11 [02:15<00:51, 17.18s/it]

70 tensor(0.9760)


 82%|█████████████████████████████████████████████████████████▎            | 9/11 [02:32<00:33, 16.94s/it]

80 tensor(0.9901)


 91%|██████████████████████████████████████████████████████████████▋      | 10/11 [02:49<00:16, 16.91s/it]

90 tensor(0.9802)


100%|█████████████████████████████████████████████████████████████████████| 11/11 [03:05<00:00, 16.87s/it]

100 tensor(0.9876)





In [6]:
bc.load(path="./tmp/bc/Maze-att/", name=str(30))
bc.policy.train()
with torch.no_grad():
    acc = 0
    for i in range(len(dataset)):
        s, a, ns = dataset[i]
        action = torch.argmax(bc.forward(s[None]), dim=1).squeeze()
        acc += action == a.squeeze()
print(30, acc / len(dataset))

30 tensor(0.9959)


In [7]:
maze_paths="./src/environment/mazes/mazes5"
maze_settings=params
maze_settings

{'shape': (5, 5),
 'screen_width': 600,
 'screen_height': 600,
 'visual': True,
 'occlusion': False}

In [8]:
bc.load(path="./tmp/bc/Maze-att/", name="50")
bc.policy.eval()
metrics = defaultdict(int)
solutions = defaultdict(list)

with torch.no_grad():
    for maze_type in ["train", "eval"]:
        path = f"{maze_paths}/{maze_type}"
        structures = [join(path, f) for f in listdir(path) if isfile(join(path, f))]
    
        average_reward = []
        success_rate = []
    
        structures = tqdm(structures, desc=f"eval with {maze_type}")
        for structure in structures:
            env = gym.make("Maze-v0", **maze_settings)
            done = False
    
            try:
                obs = env.load(structure)
                accumulated_reward = 0
                early_stop_count = defaultdict(int)
                while not done:
                    obs = transform(obs)
                    action = torch.argmax(bc.forward(obs[None]), dim=1).squeeze().item()
                    obs, reward, done, _ = env.step(action)
                    accumulated_reward += reward
                    early_stop_count[tuple(obs.flatten().tolist())] += 1
    
                    if np.max(list(early_stop_count.values())) >= 5:
                        step_reward = -.1 / (env.shape[0] * env.shape[1])
                        lower_reward = env.max_episode_steps * step_reward
                        accumulated_reward = lower_reward
                        break
    
                if done:
                    solutions[maze_type].append(structure)
    
            finally:
                env.close()
    
            success_rate.append(1 if done else 0)
            average_reward.append(accumulated_reward)
        metrics[f"{maze_type} aer"] = np.mean(average_reward)
        metrics[f"{maze_type} aer (std)"] = np.std(average_reward)
        metrics[f"{maze_type} sr"] = np.mean(success_rate)
metrics["aer"] = metrics["train sr"]
metrics, solutions

eval with train: 100%|█████████████████████████████████████████████| 100/100 [00:59<00:00,  1.67it/s]
eval with eval: 100%|██████████████████████████████████████████████| 100/100 [00:52<00:00,  1.91it/s]


(defaultdict(int,
             {'train aer': 0.9555999999999999,
              'train aer (std)': 0.014505171491575009,
              'train sr': 1.0,
              'eval aer': -2.4622,
              'eval aer (std)': 2.2942756852653954,
              'eval sr': 0.31,
              'aer': 1.0}),
 defaultdict(list,
             {'train': ['./src/environment/mazes/mazes5/train/926763516896451710.txt',
               './src/environment/mazes/mazes5/train/-4736102849644038576.txt',
               './src/environment/mazes/mazes5/train/-4839731022632257933.txt',
               './src/environment/mazes/mazes5/train/5576255441120359836.txt',
               './src/environment/mazes/mazes5/train/-2810139998966129988.txt',
               './src/environment/mazes/mazes5/train/4562321574665168570.txt',
               './src/environment/mazes/mazes5/train/-8782880872547250718.txt',
               './src/environment/mazes/mazes5/train/-2301343925662862078.txt',
               './src/environment/mazes

In [10]:
env.close()