In [1]:
import mlagents
from mlagents_envs.environment import UnityEnvironment as UE
import numpy as np
from mlagents_envs.environment import ActionTuple

# env =  UE(file_name="stage0_160523\stage0_copy",seed=1,side_channels=[])
env = UE(file_name=r"C:\Users\linzj\Desktop\environment\S2_train\\build",seed=1,side_channels=[],worker_id=8,no_graphics = False)
env.reset()


In [2]:
import os
import torch
import torch.nn as nn
import torch.nn.functional as F

vision_output_dim = 3136
language_output_dim = 128
embedding_dim = 128
mixing_dim = 256
lstm_hidden_dim = 256
num_actions = 4

# (3,128,128) --> (64,7,7) = 3136 (3-layer CNN)
class VisualModule(nn.Module): 
    def __init__(self):
        super(VisualModule, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=3, padding=0),
            nn.ReLU()
        )
        # self.conv = nn.Sequential(
        #     nn.Conv2d(3, 32, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(32, 64, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 128, kernel_size=5, stride=2, padding=2),
        #     nn.ReLU(),
        #     nn.Conv2d(128, 64, kernel_size=5, stride=2, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        #     nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
        #     nn.ReLU(),
        # )

    def forward(self, vt):
        encoded_vt = self.conv(vt)
        return encoded_vt.view(vt.size(0), -1).squeeze()

# one-hot encoding [0 0 1 0 0] --> 128 dimensional embedding (FF)
# S1:5 S2:5 S3:11 S4:9 --> 30 + 5 (noun) = 35 in total
class LanguageModule(nn.Module): 
    def __init__(self, embedding_dim):
        super(LanguageModule, self).__init__()
        self.linear = nn.Linear(512, embedding_dim)
          
        # self.linear = nn.Sequential(
        #     nn.Linear(512, embedding_dim),
        #     nn.ReLU())


    def forward(self, lt):
        embedded_lt = self.linear(lt)
        return embedded_lt

# 3136(vision) + 128 (language) --> 256 dimensional embedding (FF)
class MixingModule(nn.Module):
    def __init__(self, vision_output_dim, language_output_dim, mixing_dim):
        super(MixingModule, self).__init__()
        self.linear = nn.Linear(vision_output_dim + language_output_dim, mixing_dim)
        # self.linear = nn.Sequential(
        #     nn.Linear(vision_output_dim + language_output_dim, mixing_dim),
        #     nn.ReLU())

    def forward(self, vision_output, language_output):
        combined_output = torch.cat((vision_output, language_output), dim=0)
        mixed_output = self.linear(combined_output)
        return mixed_output

class LSTMModule(nn.Module):
    def __init__(self,mixing_dim,lstm_hidden_dim):
        super(LSTMModule, self).__init__()
        self.lstm = nn.LSTMCell(mixing_dim, lstm_hidden_dim)
    
    def forward(self,mixed_output,lstm_hidden_state):
        lstm_hidden_state = self.lstm(mixed_output, lstm_hidden_state) 
        # lstm_output = lstm_hidden_state[0] # output is (hidden_state,cell_state), we need hidden state, shape (1,256)
        return lstm_hidden_state

class Agent(nn.Module):
    def __init__(self, embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions):
        super(Agent, self).__init__()
        self.language_module = LanguageModule(embedding_dim)
        self.visual_module = VisualModule()
        self.mixing_module = MixingModule(vision_output_dim, language_output_dim, mixing_dim)
        self.lstm_module = LSTMModule(mixing_dim, lstm_hidden_dim)
        self.action_predictor = nn.Linear(lstm_hidden_dim, num_actions)
        self.value_estimator = nn.Linear(lstm_hidden_dim, 1)

    def forward(self, vt, lt, lstm_hidden_state):
        vision_output = self.visual_module(vt)
        language_output = self.language_module(lt)
        mixed_output = self.mixing_module(vision_output, language_output).unsqueeze(0)
        lstm_output = self.lstm_module(mixed_output,lstm_hidden_state)
        action_probs = self.action_predictor(lstm_output[0]) 
        value_estimate = self.value_estimator(lstm_output[0])
        return action_probs,value_estimate,lstm_output
        
        
    def save(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join(r'C:\Users\linzj\Desktop\model_before', '_'.join([ALG_NAME, ENV_ID]))
        if not os.path.exists(path):
            os.makedirs(path)
        torch.save(self.state_dict(), os.path.join(path, f'agent_{episode}.pt'))

    def load(self, episode, ALG_NAME, ENV_ID):
        path = os.path.join(r'C:\Users\linzj\Desktop\model_before', '_'.join([ALG_NAME, ENV_ID]))
        self.load_state_dict(torch.load(os.path.join(path, f'agent_{episode}.pt')))   

    
    # def load(self, episode, ALG_NAME, ENV_ID):
    #     path = os.path.join('model', '_'.join([ALG_NAME, ENV_ID]))
    #     saved_state_dict = torch.load(os.path.join(path, f'agent_{episode}.pt'))

    #     # Create a new state_dict for the model and only copy parameters except 'language_module'
    #     new_state_dict = {}
    #     for key, value in saved_state_dict.items():
    #         if 'language_module' not in key:
    #             new_state_dict[key] = value

    #     # Load the modified state_dict into the agent
    #     self.load_state_dict(new_state_dict, strict=False)



  from .autonotebook import tqdm as notebook_tqdm


In [39]:
import pickle
input_string = 'green cube'
# Load the dictionary from the pickle file
with open(r'C:\Users\linzj\Desktop\Agent training\clip.pkl', 'rb') as pickle_file:
    clip_encoder = pickle.load(pickle_file)
clip_encoder[input_string][0]

tensor([-2.0320e-02,  8.2796e-03,  2.1406e-02,  5.6506e-02,  1.2431e-02,
        -3.9443e-02, -3.2578e-02, -1.1511e-02, -8.7145e-03,  2.8967e-03,
         2.2257e-02, -2.7940e-02,  2.3075e-02, -7.5300e-03,  2.8859e-03,
        -5.9377e-03, -1.4408e-01, -3.0802e-03, -2.0899e-02, -5.7119e-02,
        -2.8117e-02,  7.1532e-02, -2.0686e-02, -1.0602e-02, -1.7278e-02,
        -1.1986e-02, -3.2243e-02,  1.8597e-02,  4.0634e-02, -3.5627e-03,
         8.4979e-02, -8.2717e-03,  2.6249e-03, -2.4647e-04,  3.7600e-05,
        -1.4175e-02,  3.9522e-02, -8.6989e-02,  2.4306e-02,  2.2828e-02,
         1.0826e-02, -2.1569e-02,  2.9212e-02,  3.2010e-03,  3.8441e-02,
         5.1095e-02, -4.5703e-02,  1.1612e-02,  4.7064e-03,  2.5812e-03,
        -4.5083e-03, -2.7430e-03, -2.2139e-02, -6.6525e-03,  1.9910e-02,
        -3.6391e-02, -4.1970e-02,  4.0291e-02, -7.9102e-02, -2.5100e-03,
        -8.3486e-02,  3.1922e-02, -1.8551e-02,  9.2341e-03,  1.7096e-02,
        -2.4965e-02, -5.1849e-02,  3.5880e-02, -1.0

In [40]:
import torch
import clip, open_clip
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
text = clip.tokenize(input_string).to(device)
with torch.no_grad():
    text_features = model.encode_text(text)
text_features /= text_features.norm(dim=-1, keepdim=True)
text_features

tensor([[ 4.0364e-04, -1.3565e-02, -8.4915e-03, -2.4780e-02,  5.0385e-02,
         -4.3091e-02, -1.5945e-02, -9.7290e-02, -4.2114e-02, -5.6763e-03,
          1.7502e-02, -1.0849e-02, -3.0914e-02, -1.3523e-03,  2.5803e-02,
          1.8066e-02,  1.2726e-02, -2.0294e-02, -5.1544e-02, -1.3361e-03,
          3.9307e-02, -1.8967e-02,  5.2032e-02, -1.7975e-02, -1.4137e-02,
          1.7151e-02,  1.4982e-03,  2.4033e-02, -6.2675e-03,  1.7334e-02,
         -1.3496e-02,  3.1185e-03,  1.6617e-02,  8.6288e-03,  7.2266e-02,
         -2.0325e-04,  2.9419e-02,  1.0895e-02,  2.5360e-02, -1.0521e-02,
          3.6987e-02,  3.0670e-03, -1.3016e-02, -2.4277e-02,  6.8398e-03,
          9.0256e-03, -6.8787e-02,  3.4973e-02,  2.8259e-02,  3.2684e-02,
         -4.0009e-02, -6.1066e-02,  2.0370e-02, -1.1147e-02,  1.1612e-02,
          2.1301e-02, -1.4069e-02, -1.9882e-02, -1.6556e-03, -1.5030e-03,
          6.5994e-03,  4.2038e-03, -2.3880e-02, -6.7568e-04,  4.7188e-03,
         -7.8583e-03,  3.1982e-02,  5.

In [5]:
#-1(20000): -5 # -2.5 (10000): -6.73  #-5 (5000):-9.46  # -7.5 (5000): -7.02  #-10 (5000):-10(circle same place)
# S1_4:12996      S1_13:6495           S1_14:4587        S1_15: 4073            S1_16:3085
from torch.distributions import Categorical
import open_clip
device = torch.device("cpu")
tokenizer = open_clip.get_tokenizer('ViT-B-32')
speed = 3
MAX_STEPS = 500
TEST_EPISODES = 100
ALG_NAME = 'S2clip_final'
ENV_ID = '2' 
episode = 16595
tracked_agent = -1
agent = Agent(embedding_dim, vision_output_dim, language_output_dim, mixing_dim, lstm_hidden_dim,num_actions)
agent.load(episode,ALG_NAME,ENV_ID)
average = 0
hashmap = {
0: 'capsule',
1: 'cube',
2: 'cylinder',
3: 'prism',
4: 'sphere',
5: 'red',
6: 'green',
7: 'blue',
8: 'yellow',
9: 'black'}
for episode in range(TEST_EPISODES):
            STEPS = 0
            episode_reward = 0
            behavior_name=list(env.behavior_specs)[0]
            spec=env.behavior_specs[behavior_name]
            # state = env.reset().astype(np.float32)
            decision_steps, terminal_steps = env.get_steps(behavior_name)
            # state -- vt, lt, lstm
            vt = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
            index1 = int(decision_steps.obs[1][0][0])
            index2 = int(decision_steps.obs[1][0][1])+5
            input_string = f'{hashmap[index2]} {hashmap[index1]}'
            # print(f'TEST: ---{input_string}---')
            lt = tokenizer(input_string)
            lstm_hidden_state = (torch.zeros(1, lstm_hidden_dim).to(device), torch.zeros(1, lstm_hidden_dim).to(device))
            done = False
            while True:

                # Need to use when calculating the loss
                log_probs = []
                # values = []
                values = torch.empty(0).to(device)
                rewards = []

                
                lstm_hidden_state = tuple(tensor.detach() for tensor in lstm_hidden_state)
                STEPS += 1
                policy_dist, value, lstm_hidden_state = agent(vt,lt,lstm_hidden_state)
                # value = value.detach()
                dist = F.softmax(policy_dist.detach(),dim=1).cpu().numpy()
                

                action_dist = Categorical(F.softmax(policy_dist.detach(),dim=1))
                # action_dist = Categorical(F.softmax(policy_dist,dim=1))
                action = action_dist.sample() # sample an action from action_dist
                action_onehot = F.one_hot(torch.tensor(action),num_actions).cpu()
                
                log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                # log_prob = torch.log(F.softmax(policy_dist,dim=1)[0][action])
                # entropy = -np.sum(np.mean(dist)* np.log(dist))
                entropy = F.cross_entropy(policy_dist.detach(), action)

                discrete_actions = np.array(action_onehot).reshape(1,4)*speed
                action_tuple = ActionTuple()
                action_tuple.add_discrete(discrete_actions)
                env.set_actions(behavior_name,action_tuple)
                env.step()
                decision_steps, terminal_steps = env.get_steps(behavior_name)

                if tracked_agent == -1 and len(decision_steps) >= 1:
                    tracked_agent = decision_steps.agent_id[0]
                    # print(tracked_agent)

                if tracked_agent in terminal_steps: # roll over or hit the target
                    # print('TEST: Agent in terminal steps')
                    done = True
                    reward = terminal_steps[tracked_agent].reward
                    if reward > 0:
                        pass
                    else: reward = -1 # roll over or other unseen conditions

                    # print(f'TEST: Terminal Step reward: {reward}')

                elif tracked_agent in decision_steps: # the agent which requires action
                    reward = decision_steps[tracked_agent].reward
                    # print(f'Decision Step reward: {reward}')
                    # if reward<0:
                    #     print(f'TEST: Decision Step reward: {reward}')
                if STEPS >= MAX_STEPS:
                    reward = -10
                    # print(f'TEST: Max Step Reward: {reward}')
                    env.reset()
                    done = True
                # if STEPS % 100 == 0:
                #     print (f'TEST: Step: {STEPS}')

                episode_reward = episode_reward + reward

                rewards.append(reward)
                # values.append(value)
                values = torch.cat((values, value), dim=0)
                log_probs.append(log_prob)
                entropy_term = entropy_term + entropy
                vt_new = torch.tensor(decision_steps.obs[0]).reshape(1,3,128,128).to(device)
                vt = vt_new
            average += episode_reward / TEST_EPISODES
            print(f'Episode: {episode}, Episode reward: {episode_reward}')
print(f'Average Episode Reward: {average}')


77


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x77 and 512x128)