# Imports and environment loading

In [2]:
from utils.model import MNmodelPartAM_Dueling
import pickle
import torch
import os
from matplotlib import pyplot as plt
from utils.learner import Segmenter, RLLearner
from utils.env_utils import load_env
import cv2
from tqdm import tqdm
torch.backends.cudnn.benchmark = True
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_build = 'NAVIndoor/maze' #environment path
env, behavior_name,channel_env = load_env(train_build,10,0,0,1)


[UnityMemory] Configuration Parameters - Can be set up in boot.config
    "memorysetup-bucket-allocator-granularity=16"
    "memorysetup-bucket-allocator-bucket-count=8"
    "memorysetup-bucket-allocator-block-size=4194304"
    "memorysetup-bucket-allocator-block-count=1"
    "memorysetup-main-allocator-block-size=16777216"
    "memorysetup-thread-allocator-block-size=16777216"
    "memorysetup-gfx-main-allocator-block-size=16777216"
    "memorysetup-gfx-thread-allocator-block-size=16777216"
    "memorysetup-cache-allocator-block-size=4194304"
    "memorysetup-typetree-allocator-block-size=2097152"
    "memorysetup-profiler-bucket-allocator-granularity=16"
    "memorysetup-profiler-bucket-allocator-bucket-count=8"
    "memorysetup-profiler-bucket-allocator-block-size=4194304"
    "memorysetup-profiler-bucket-allocator-block-count=1"
    "memorysetup-profiler-allocator-block-size=16777216"
    "memorysetup-profiler-editor-allocator-block-size=1048576"
    "memorysetup-temp-allocator-siz

# Training

In [3]:
ac = {0:[1,0]
     ,1:[0,1]
     ,2:[0,-1]
     ,3:[-1,0]
     #,4:[1,1]
     #,5:[1,-1]
     #,6:[-1,1]
     #,7:[-1,-1]
     #,8:[0,0]
     } #action mapper. Includes 9 possibilities with combinations for moving (first parameter : 1 = forward, -1 = backward) and rotation (second parameter : -1 = left, 1 = right). On our experiments we only use 4.
output_classes = len(ac)


erase = False
budget = 100000 
n_frames = 3

scale = 3 #model size
aes = 20 #action embedding space length
am = 20 #action memory buffer length 
ahes = 150 #action memory representation length


buffer_size = 12000 #replay buffer size. Big sizes may not fill in small memory devices.
gamma = 0.97
train_every = 5 #backward frequency



update_type = 'hard' #update type for Q theta - ('hard' or 'soft')
tau = 0.01 # soft update parameter if update_type = 'e
update_every = 50 #update frequency of Q theta - if update_type = 'hard'


learning_rate = 0.001
batch_size = 128

episode_duration = 400
epsilon_decrease = int(budget/3)
epsilon_min = 0.15

#model = MNmodelPartAM_Dueling(scale=scale,output=output_classes,dropout=0,n_actions = output_classes,am = am,aes = aes, ahes = ahes,n_frames = n_frames)
#target_model = MNmodelPartAM_Dueling(scale=scale,output=output_classes,dropout=0, n_actions = output_classes,am = am, aes = aes, ahes = ahes, n_frames = n_frames)

env_settings = {"coin_proba":1, #parameters for the environments. 
 "increase_obstacle_proba":1, #Linear increase in obstacle proportion until max_obstacle_proba is reached
 "move_speed":[1,1], #Movement speed
 "turn_speed":[150,150], #Rotation speed
 "momentum":[0,0], #Inertial momentum
 "decrease_reward_on_stay":0, #decrease reward when OnStayCollided method is called
 "coin_visible":1, #Coins visibility
 "max_obstacle_proba":0.3} #Obstacle proportion


name = '3_1_1_1-1_0_floor_dueling'



#model =  SFO_model(output_classes=output_classes,scale_fl = scale, model_class = MNmodelPart,dropout=0)
#target_model = SFO_model(output_classes=output_classes,scale_fl = scale, model_class = MNmodelPart,dropout=0)

model = MNmodelPartAM_Dueling(scale=scale,output=output_classes,dropout=0,n_actions = output_classes,am = am,aes = aes, ahes = ahes,n_frames=3)
target_model = MNmodelPartAM_Dueling(scale=scale,output=output_classes,dropout=0, n_actions = output_classes,am = am, aes = aes, ahes = ahes,n_frames=3)



Learner = RLLearner(env=env,
                    model=model,
                    target_model=target_model,
                    learning_rate = learning_rate,
                    update_every = update_every, 
                    buffer_size= buffer_size, 
                    gamma = gamma, 
                    batch_size = batch_size,
                    epsilon_decrease=epsilon_decrease,
                    episode_duration = episode_duration,
                    action_mapper = ac,
                    segmenter = Segmenter(True), #Segmenter argument is ground segmentation (with value 0.5)
                    train_every = train_every,
                    device = device,
                    do_clip = 0, #save a clip after each episode.
                    epsilon_min = epsilon_min,
                    action_memory = am,
                   update_type = update_type,
                   tau= tau,
                   using_seg_input=True,
                   n_frames = n_frames,
                   channel_env = channel_env,
                   budget = budget,
                   env_settings = env_settings,
                   eval_n = 2) #number of episodes used for evaluation.

    


In [None]:
Learner.train_model()

# Save model and reward values

In [None]:
plt.plot(Learner.rewards_eval,'r')
with open('graphs_v3/'+name+'.pickle', 'wb') as f:
    pickle.dump(Learner.rewards_eval,f)
plt.savefig('graphs_v3/'+name+'.png',dpi=300)
print('graphs_v3/'+name+'.png')
plt.close()
torch.save(Learner.model.state_dict(), 'graphs_v3/'+name+'.pt')

# Clip a video

In [8]:
frames = Learner.clip(1000)
video = cv2.VideoWriter('video_episode.avi',cv2.VideoWriter_fourcc(*'XVID'),15, (128,128)) #path, encoding, frames per second, image size
for image in tqdm(frames):
    video.write(cv2.cvtColor(image, cv2.COLOR_RGB2BGR))
video.release()

100%|████████████████████████████████████| 1000/1000 [00:00<00:00, 21285.48it/s]
