In [14]:
import numpy as np
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
import gym
import gym3
from procgen import ProcgenGym3Env
from torchinfo import summary
import time

import resnet
import datastructures
import core

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [2]:
num_agents = 16
env = ProcgenGym3Env(num=num_agents, env_name="coinrun", distribution_mode="easy", paint_vel_info=True)
print(env.ob_space)
print(env.ac_space)

Dict(rgb=D256[64,64,3])
D15[]


In [6]:
from CVModels import CNNAgent
from PPO import PPO

model = CNNAgent([64, 64, 3], 15, channels=16, layers=[1,1,1,1], scale=[1,1,1,1], vheadLayers=1).to(device)
model.train()
ppo = PPO(model, env, num_agents=num_agents)

summary(model, input_size=(2, 3, 64, 64))

4 16


Layer (type:depth-idx)                   Output Shape              Param #
CNNAgent                                 [2, 15]                   --
├─Conv2d: 1-1                            [2, 16, 64, 64]           432
├─MaxPool2d: 1-2                         [2, 16, 32, 32]           --
├─Sequential: 1-3                        [2, 16, 32, 32]           --
│    └─BasicBlock: 2-1                   [2, 16, 32, 32]           --
│    │    └─BatchNorm2d: 3-1             [2, 16, 32, 32]           32
│    │    └─ReLU: 3-2                    [2, 16, 32, 32]           --
│    │    └─Conv2d: 3-3                  [2, 16, 32, 32]           2,304
│    │    └─BatchNorm2d: 3-4             [2, 16, 32, 32]           32
│    │    └─ReLU: 3-5                    [2, 16, 32, 32]           --
│    │    └─Conv2d: 3-6                  [2, 16, 32, 32]           2,304
├─Sequential: 1-4                        [2, 16, 16, 16]           --
│    └─BasicBlock: 2-2                   [2, 16, 16, 16]           --
│    │  

In [7]:
# ppo.runGame()
# loss = ppo.train(debug=True)
# print(loss)
# import torchviz
# torchviz.make_dot(loss, params=dict(model.named_parameters()))

In [4]:
for i in range(100):
    ppo.runGame()
    ppo.train()
    if i % 10 == 0:
        print("episodeLength", ppo.all_stats[-1]["episodeLength"], "nonZeroReward", ppo.all_stats[-1]["nonZeroReward"],
              "epoch", ppo.all_stats[-1]["epoch"], "steps", ppo.all_stats[-1]["steps"], 
              "loss", ppo.all_stats[-1]["ppo/loss/total"].item(), "policy", ppo.all_stats[-1]["ppo/loss/policy"].item(), 
              "value", ppo.all_stats[-1]["ppo/loss/value"].item(),
              "entropy", ppo.all_stats[-1]["ppo/policy/entropy"].item())
    if i % 100 == 0:
        stats = ppo.all_stats[-1]
        for k, v in stats.items():
            if "time" in k:
                print(k, v)

episodeLength 138.875 nonZeroReward 0.625 epoch 0 steps 4096 loss 0.11894977919804514 policy 0.07235269021885041 value 0.04659708897919471 entropy 2.4408762454986572
time/runGame 2.5554771423339844
time/computeAdvantages 0.05899333953857422
time/ppo/forward 1.3123829364776611
time/ppo/backward 2.054478406906128
time/ppo/optim 2.178558588027954
time/ppo/stats 0.7469000816345215
time/epoch 6.9931254386901855
time/game/observe 0.2519690990447998
time/game/act 0.021007537841796875
time/game/forward 1.8530879020690918
time/game/stats 0.18725156784057617
time/game/transition 0.15802001953125
episodeLength 424.85714285714283 nonZeroReward 0.7142857142857143 epoch 10 steps 45056 loss 0.014673157579111163 policy 0.012471733978299527 value 0.0022014236008116423 entropy 2.1042418479919434
episodeLength 133.23333333333332 nonZeroReward 0.5333333333333333 epoch 20 steps 86016 loss 0.010302181355397518 policy 0.005014551941518333 value 0.005287629413879178 entropy 2.0411903858184814
episodeLength 10

KeyboardInterrupt: 

In [5]:
torch.save(model.state_dict(), "resnet.pth")

In [43]:
from timm.models.vision_transformer import VisionTransformer
h = 64
dim = 64
# model = VisionTransformer(img_size=h, num_classes=15, depth=3, num_heads=4, embed_dim=dim, mlp_ratio=2).to(device)


class ViTValue(nn.Module):
    def __init__(self, img_size=64, num_classes=15, depth=3, num_heads=4, embed_dim=64, mlp_ratio=2, valueHeadLayers=2):
        super().__init__()
        from CVModels import ValueHead
        self.model = VisionTransformer(img_size=img_size, num_classes=num_classes, depth=depth, num_heads=num_heads, embed_dim=embed_dim, mlp_ratio=mlp_ratio)
        self.value = ValueHead(n_in=model.embed_dim, n_out=1, layers=valueHeadLayers)
    def forward(self, x):
        x = self.model.forward_features(x)
        x = self.model.forward_head(x, True) # pre logits doesn't apply head layer yet
        l = self.model.head(x)
        v = self.value(x)
        return l, v
    
model = ViTValue().to(device)
summary(model, input_size=(2, 3, h, h))

Layer (type:depth-idx)                   Output Shape              Param #
ViTValue                                 [2, 15]                   --
├─VisionTransformer: 1-1                 --                        1,152
│    └─PatchEmbed: 2-1                   [2, 16, 64]               --
│    │    └─Conv2d: 3-1                  [2, 64, 4, 4]             49,216
│    │    └─Identity: 3-2                [2, 16, 64]               --
│    └─Dropout: 2-2                      [2, 17, 64]               --
│    └─Identity: 2-3                     [2, 17, 64]               --
│    └─Sequential: 2-4                   [2, 17, 64]               --
│    │    └─Block: 3-3                   [2, 17, 64]               33,472
│    │    └─Block: 3-4                   [2, 17, 64]               33,472
│    │    └─Block: 3-5                   [2, 17, 64]               33,472
│    └─LayerNorm: 2-5                    [2, 17, 64]               128
│    └─Identity: 2-6                     [2, 64]                 