### ENV

In [None]:

import carla 
# client = carla.Client("localhost", 2000)
# world = client.load_world('Town01')
import gymnasium as gym
from carla_env import CarEnv
import numpy as np
import time



class CARLA_G(gym.Env):
    def __init__(self, ):
        super(CARLA_G, self).__init__()
        self.env = CarEnv()
        self.action_space = gym.spaces.Box(low=-1, high=1, shape = (2, ), dtype=np.float32)
        self.observation_space = gym.spaces.Box(low = -np.inf, high = np.inf, shape=(16, ), dtype=np.float32)

    def step(self, action):
        [new_image, new_state], reward, done, info = self.env.step(action)
        return new_state.astype(np.float32), reward, done, False, {}
    
    def reset(self, seed = None, options = {}):
        image, state = self.env.reset()
        return state.astype(np.float32), {}
    
    def render(self):
        pass

env = CARLA_G()
# check_env(env, warn=True)
# model = DDPG("MlpPolicy", env)
# model.learn(total_timesteps=10000, log_interval=10)




In [None]:
from stable_baselines3 import SAC, PPO, DDPG
model = SAC.load("SAC_model_run1_74", print_system_info=True)


### RL

In [None]:
model2 = SAC("MlpPolicy", env, verbose=1)

In [None]:
env

In [None]:
from stable_baselines3 import SAC, PPO, DDPG
from stable_baselines3.common.env_checker import check_env

# model = SAC("MlpPolicy", env, verbose=1)
model = SAC.load("SAC_model_run1_74", print_system_info=True)
model.env = model2.env

for i in range(1000):
    name = "SAC_model_run2_"+str(i+1)
    print(name)
    model.learn(total_timesteps=10000, log_interval=4)
    model.save(name)

### Evaluate

In [None]:
obs, info = env.reset()
# Evaluate the agent
episode_reward = 0
for _ in range(10000):
    action, _ = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    episode_reward += reward
    if terminated or truncated or info.get("is_success", False):
        print("Reward:", episode_reward, "Success?", info.get("is_success", False))
        episode_reward = 0.0
        obs, info = env.reset()

#### new

In [None]:
class CARLA_G_fusion(gym.Env):
    def __init__(self, ):
        super(CARLA_G_fusion, self).__init__()
        self.env = CarEnv()
        self.action_space = gym.spaces.Box(low=-1, high=1, shape = (2, ), dtype=np.float32)
        # self.observation_space = gym.spaces.Box(low = -np.inf, high = np.inf, shape=(16, ), dtype=np.float32)
        self.observation_space = gym.spaces.Dict({"image": gym.spaces.Box(low=0, high=1,shape=(3,60,160), dtype=np.float32), "tracking": gym.spaces.Box(low = -np.inf, high = np.inf, shape=(16, ), dtype=np.float32)})

    def step(self, action):
        [new_image, new_state], reward, done, info = self.env.step(action)
        first_ch_new_image = np.moveaxis(new_image,-1,0)/255
        return {"image":first_ch_new_image.astype(np.float32), "tracking":new_state.astype(np.float32)}, reward, done, False, {}
    
    def reset(self, seed = None, options = {}):
        image, state = self.env.reset()
        first_ch_image = np.moveaxis(image,-1,0)/255

        return {"image":first_ch_image.astype(np.float32),"tracking":state.astype(np.float32)}, {}
    
    def render(self):
        pass

env = CARLA_G_fusion()
town_name = 'Town01'
env.env.town_name = town_name
env.reset()

In [None]:
import torch
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, feature_dim=16):
        super(ResidualBlock, self).__init__()

        self.conv1 = nn.Conv2d(feature_dim, feature_dim, 3, padding='same')
        self.bn1 = nn.BatchNorm2d(feature_dim)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(feature_dim, feature_dim, 3, padding='same')
        self.bn2 = nn.BatchNorm2d(feature_dim)
        self.relu = nn.ReLU(inplace=True)
        self.conv3 = nn.Conv2d(feature_dim, feature_dim, 3, padding='same')
        self.bn3 = nn.BatchNorm2d(feature_dim)
        self.relu = nn.ReLU(inplace=True)


    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        # out = self.conv2(out)
        # out = self.bn2(out)
        # out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        out += identity
        out = self.relu(out)
        return x


net = ResidualBlock()
print(net)


In [None]:
nn.Conv2d(3, 16, 3, padding='same')(torch.rand(1,3,60,160)).shape, net(torch.rand(1,16,60,160)).shape

In [None]:
nn.Conv2d(3, 16, 3, padding='same')(torch.rand(1,3,60,160)).shape, resnet18(torch.rand(1,3,60,160)).shape

In [None]:
import torch
import torch.nn as nn
class Block(nn.Module):
    
    def __init__(self, in_channels, out_channels, identity_downsample=None, stride=1):
        super(Block, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU()
        self.identity_downsample = identity_downsample
        
    def forward(self, x):
        identity = x
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.conv2(x)
        x = self.bn2(x)
        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)
        x += identity
        x = self.relu(x)
        return x

class ResNet_18(nn.Module):
    
    def __init__(self, image_channels, num_classes):
        
        super(ResNet_18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        #resnet layers
        self.layer1 = self.__make_layer(64, 64, stride=1)
        self.layer2 = self.__make_layer(64, 128, stride=2)
        self.layer3 = self.__make_layer(128, 256, stride=2)
        self.layer4 = self.__make_layer(256, 512, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)
        
    def __make_layer(self, in_channels, out_channels, stride):
        
        identity_downsample = None
        if stride != 1:
            identity_downsample = self.identity_downsample(in_channels, out_channels)
            
        return nn.Sequential(
            Block(in_channels, out_channels, identity_downsample=identity_downsample, stride=stride), 
            Block(out_channels, out_channels)
        )
        
    def forward(self, x):
        
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        
        x = self.avgpool(x)
        x = x.view(x.shape[0], -1)
        x = self.fc(x)
        return x 
    
    def identity_downsample(self, in_channels, out_channels):
        
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=2, padding=1), 
            nn.BatchNorm2d(out_channels)
        )

model = ResNet_18(3, 100)
nn.Conv2d(3, 16, 3, padding='same')(torch.rand(1,3,60,160)).shape, model(torch.rand(1,3,60,160)).shape

In [None]:
from gymnasium import spaces

from stable_baselines3 import PPO, SAC
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
import torch
import torch.nn as nn
# from torchvision.models import resnet18, ResNet18_Weights
# model = resnet18().train()
# model.fc=nn.Linear(512,100)

class CustomCombinedExtractor(BaseFeaturesExtractor):
    def __init__(self, observation_space: gym.spaces.Dict, feature_dim=16):
        # We do not know features-dim here before going over all the items,
        # so put something dummy for now. PyTorch requires calling
        # nn.Module.__init__ before adding modules
        super().__init__(observation_space, features_dim=1)

        extractors = {}

        total_concat_size = 0
        # We need to know size of the output of this extractor,
        # so go over all the spaces and compute output feature sizes
        for key, subspace in observation_space.spaces.items():
            if key == "image":
                # We will just downsample one channel of the image by 4x4 and flatten.
                # Assume the image is single-channel (subspace.shape[0] == 0)
                # extractors[key] = nn.Sequential(nn.Conv2d(3, feature_dim, 3, padding='same'), ResidualBlock(), nn.Flatten(), nn.Linear(feature_dim*60*160, 600))
                extractors[key] = nn.Sequential(ResNet_18(3, 100)).to('cuda:0')
                total_concat_size += subspace.shape[1] // 4 * subspace.shape[2] // 4
            elif key == "tracking":
                # Run through a simple MLP
                extractors[key] = nn.Linear(subspace.shape[0], 16).to('cuda:0')
                total_concat_size += 16

        self.extractors = nn.ModuleDict(extractors).to('cuda:0')

        # Update the features dim manually
        self._features_dim = total_concat_size

    def forward(self, observations) -> torch.Tensor:
        encoded_tensor_list = []

        # self.extractors contain nn.Modules that do all the processing.
        for key, extractor in self.extractors.items():
            encoded_tensor_list.append(extractor(observations[key]))
        # Return a (B, self._features_dim) PyTorch tensor, where B is batch dimension.
        # print(torch.cat(encoded_tensor_list, dim=1).shape)
        return torch.cat(encoded_tensor_list, dim=1).to('cuda:0')
    
policy_kwargs = dict(
    features_extractor_class=CustomCombinedExtractor,
    # features_extractor_kwargs=dict(features_dim=128),
)
model = SAC("MultiInputPolicy", env, policy_kwargs=policy_kwargs, tensorboard_log="./tmp/sac_gpu/", verbose=1, buffer_size=10000)
# SAC
model.policy.actor.latent_pi[0] = torch.nn.Linear(116,256)
model.policy.critic.qf0[0] = torch.nn.Linear(118,256)
model.policy.critic.qf1[0] = torch.nn.Linear(118,256)
model.policy.critic_target.qf0[0] = torch.nn.Linear(118,256)
model.policy.critic_target.qf1[0] = torch.nn.Linear(118,256)
model.policy = model.policy.to('cuda:0')
# PPO
# model.policy.mlp_extractor.policy_net[0] = torch.nn.Linear(1816,64)
# model.policy.mlp_extractor.value_net[0] = torch.nn.Linear(1816,64)

model.learn(1000)

In [None]:
model.policy

In [None]:
# from gymnasium import spaces

# from stable_baselines3 import PPO, SAC
# from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
# class CustomCombinedExtractor(BaseFeaturesExtractor):
#     def __init__(self, observation_space: gym.spaces.Dict):
#         # We do not know features-dim here before going over all the items,
#         # so put something dummy for now. PyTorch requires calling
#         # nn.Module.__init__ before adding modules
#         super().__init__(observation_space, features_dim=1)

#         extractors = {}

#         total_concat_size = 0
#         # We need to know size of the output of this extractor,
#         # so go over all the spaces and compute output feature sizes
#         for key, subspace in observation_space.spaces.items():
#             if key == "image":
#                 # We will just downsample one channel of the image by 4x4 and flatten.
#                 # Assume the image is single-channel (subspace.shape[0] == 0)
#                 extractors[key] = nn.Sequential(nn.MaxPool2d(4), nn.Flatten())
#                 total_concat_size += subspace.shape[1] // 4 * subspace.shape[2] // 4
#             elif key == "tracking":
#                 # Run through a simple MLP
#                 extractors[key] = nn.Linear(subspace.shape[0], 16)
#                 total_concat_size += 16

#         self.extractors = nn.ModuleDict(extractors)

#         # Update the features dim manually
#         self._features_dim = total_concat_size

#     def forward(self, observations) -> torch.Tensor:
#         encoded_tensor_list = []

#         # self.extractors contain nn.Modules that do all the processing.
#         for key, extractor in self.extractors.items():
#             encoded_tensor_list.append(extractor(observations[key]))
#         # Return a (B, self._features_dim) PyTorch tensor, where B is batch dimension.
#         # print(torch.cat(encoded_tensor_list, dim=1).shape)
#         return torch.cat(encoded_tensor_list, dim=1)
    
# policy_kwargs = dict(
#     features_extractor_class=CustomCombinedExtractor,
#     # features_extractor_kwargs=dict(features_dim=128),
# )
# model = SAC("MultiInputPolicy", env, policy_kwargs=policy_kwargs, tensorboard_log="./tmp/sac/", verbose=1, buffer_size=10000)

# # SAC
# model.policy.actor.latent_pi[0] = torch.nn.Linear(1816,256)
# model.policy.critic.qf0[0] = torch.nn.Linear(1818,256)
# model.policy.critic.qf1[0] = torch.nn.Linear(1818,256)
# model.policy.critic_target.qf0[0] = torch.nn.Linear(1818,256)
# model.policy.critic_target.qf1[0] = torch.nn.Linear(1818,256)

# # PPO
# # model.policy.mlp_extractor.policy_net[0] = torch.nn.Linear(1816,64)
# # model.policy.mlp_extractor.value_net[0] = torch.nn.Linear(1816,64)

# model.learn(1000)

In [None]:
%load_ext tensorboard


In [8]:
# model = SAC.load("SAC_fusion_model_run_2", print_system_info=True)

== CURRENT SYSTEM INFO ==
- OS: Windows-10-10.0.22621-SP0 10.0.22621
- Python: 3.7.0
- Stable-Baselines3: 2.0.0
- PyTorch: 1.13.1+cu116
- GPU Enabled: True
- Numpy: 1.21.6
- Cloudpickle: 2.2.1
- Gymnasium: 0.28.1

== SAVED MODEL SYSTEM INFO ==
- OS: Windows-10-10.0.22621-SP0 10.0.22621
- Python: 3.7.0
- Stable-Baselines3: 2.0.0
- PyTorch: 1.13.1+cu116
- GPU Enabled: True
- Numpy: 1.21.6
- Cloudpickle: 2.2.1
- Gymnasium: 0.28.1



RuntimeError: Error(s) in loading state_dict for MultiInputPolicy:
	size mismatch for actor.latent_pi.0.weight: copying a param with shape torch.Size([256, 116]) from checkpoint, the shape in current model is torch.Size([256, 616]).
	size mismatch for critic.qf0.0.weight: copying a param with shape torch.Size([256, 118]) from checkpoint, the shape in current model is torch.Size([256, 618]).
	size mismatch for critic.qf1.0.weight: copying a param with shape torch.Size([256, 118]) from checkpoint, the shape in current model is torch.Size([256, 618]).
	size mismatch for critic_target.qf0.0.weight: copying a param with shape torch.Size([256, 118]) from checkpoint, the shape in current model is torch.Size([256, 618]).
	size mismatch for critic_target.qf1.0.weight: copying a param with shape torch.Size([256, 118]) from checkpoint, the shape in current model is torch.Size([256, 618]).

In [None]:
# model = SAC.load("SAC_model_run1_97", print_system_info=True)

for i in range(3,1000):
    name = "SAC_fusion_model_run_"+str(i+1)
    print(name)
    print(model)
    model.learn(total_timesteps=1000, log_interval=4)
    model.save(name)