In [1]:
import socket
import sys
import cv2
import numpy as np
import struct
import argparse
import math
import random
import json
import datetime
from collections import namedtuple

import torch
import torch.optim as optim

import DQN
from Enet import ENet
import utils_socket
import utils_obs
from os import listdir
import matplotlib.pyplot as plt
from PIL import Image
from PIL import ImageOps
from matplotlib.pyplot import imshow


In [2]:
class Args:
    model_direc = "./Model/"
    interruption_direc = "./Interruption/"
    obs_mode = 1  #0: raw image,  1: processed_img,  2: segmentation image
    total_steps = 10000
    lr = 0.0005
    batch_size = 32
    grad_clamp = 1
    steering  = [1400,1425,1450,1500,1550,1570,1600]  #steering for severn actions
    EPS_START = 0.9
    EPS_END = 0.05
    EPS_DECAY = 50
args = Args()

def send_action(step, action, socket):
    socket.send(struct.pack(">i", step))
    socket.send(struct.pack(">i", args.steering[action]))       

def select_action(steps_done, state, args, model):
    sample = random.random()
    eps_threshold = args.EPS_END + (args.EPS_START - args.EPS_END) * math.exp(-1. * steps_done / args.EPS_DECAY)
    if sample > eps_threshold:
        with torch.no_grad():
            state = torch.from_numpy(state).float().unsqueeze(0)
            return model(state).max(1)[1][0].item()
    else:
        return random.randrange(7)

def get_model_obs(img, semantic_img, mode):
    if mode == 0:
        obs = cv2.resize(img, (80,60))/255
        return np.transpose(obs, (2,0,1))  # 3x60x80
    else:
        obs = (cv2.resize(semantic_img, (80,60))/255/2+0.25)
        return np.stack([obs, obs, obs], axis=0)  # 3x60x80 

In [3]:
# start
interrupted = False
steps_done = 0
steps_todo = 4000

In [4]:
# initializations
# helpers 
seg_model_path = "/Users/karl/Documents/Notebooks/RobotRL/segmodel/seg_model2.0"
enet = ENet(2)
enet.eval()
enet.load_state_dict(torch.load(seg_model_path, map_location='cpu'))
# img_processor = utils_obs.ImgProcessor()
rewarder = utils_obs.Rewarder(320,240)
imgs_log = {}
sonars_log = {}
Transition = namedtuple('Transition', 'state action next_state reward')

# memory
memory = DQN.ReplayMemory(1000)
# model
policy_dqn = DQN.DQN()
with torch.no_grad():
    target_dqn = DQN.DQN()
    target_dqn.load_state_dict(policy_dqn.state_dict())

torch.save(policy_dqn.state_dict(), "./Model_raw/0.pt")

# agent
agent = DQN.Agent(policy_dqn, target_dqn, args.lr, args.batch_size, args.grad_clamp)
# rewards
rewards = {}

# # recover from interruption
# if interrupted:
#     policy_dqn.load_state_dict(torch.load(args.interruption_direc+str(step)+"policy.model"))
#     target_dqn.load_state_dict(torch.load(args.interruption_direc+str(step)+"target.model"))
#     memory = torch.load(args.interruption_direc+str(step)+".memory")

In [5]:
# connect to agent
ip_port = ('192.168.43.1', 21111)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(ip_port)
# send password
string = "111"
s.send(string.encode())
# send speed and step
s.send(struct.pack(">I", 1700))
s.send(struct.pack(">I", steps_done))
s.send(struct.pack(">I", 1600))  # tilt
s.send(struct.pack(">I", 600))  # period

4

In [89]:
########### Train ##############

# reset
sonars, img = utils_socket.get_obs_rgb(s)  #img: RGB
sonars = (10,10,10)
enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
semantic_obs = (enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8)
obs = get_model_obs(img, semantic_obs, 0)
imgs_log[steps_done] = img
sonars_log[steps_done] = sonars
print("start")

while(steps_done < steps_todo):
    # select actions
    action = select_action(steps_done, obs, args, policy_dqn)
    send_action(steps_done, action, s)
 
    # delay training to save time
    agent.update(memory)
    
    # get next state and reward
    sonars, img = utils_socket.get_obs_rgb(s)
    sonars = (10,10,10)
#     processed_img = img_processor.process_img(img)
#     obs_next = get_model_obs(img, processed_img, args.obs_mode)
    enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
    semantic_obs = (enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8)
    reward = rewarder.reward(semantic_obs, sonars)
    obs_next = get_model_obs(img, semantic_obs, 0)

    rewards[steps_done] = reward
    done = True if reward < 0 else False
    if done:
        obs_next = None
    
    steps_done += 1

    # save transition to memory
    memory.push(obs, action, obs_next, reward)
    
    obs = obs_next
    
    # save data
    if reward < 0:
        imgs_log[-steps_done] = img
        sonars_log[-steps_done] = sonars
    else:
        imgs_log[steps_done] = img
        sonars_log[steps_done] = sonars
        
    if(steps_done % 50 == 0):
        # save experiences
        saver = utils_obs.Saver("./Record_raw/"+str(steps_done)+".txt", imgs_log.copy(), sonars_log.copy())
        saver.start()
        imgs_log.clear()
        sonars_log.clear()
        # save model
        torch.save(policy_dqn.state_dict(), "./Model_raw/"+str(steps_done)+".pt")
        # synchronize
        target_dqn.load_state_dict(policy_dqn.state_dict())
    
    #stop to reset the robot
    if done:
        print(steps_done)
        break


start
1038


In [88]:
send_action(-1, 3, s)  #ask robot to resend image
with open("./Record_raw/reward.txt", "w") as f:
    json.dump(rewards, f)

In [None]:
def show(imgs):
    cv2.startWindowThread()
    for i in range(len(imgs)):
        cv2.imshow('image'+str(i),imgs[i])
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
# save checkpoints
ckp = {}
ckp['policy'] = policy_dqn.state_dict()
ckp['target'] = target_dqn.state_dict()
ckp['steps_done'] = steps_done
ckp['memory'] = memory
ckp['rewards'] = rewards
torch.save(ckp,"Interruption/ckp")

In [None]:
# For Test
steps_done = 0
# connect to agent
ip_port = ('192.168.43.1', 21111)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(ip_port)
# send password
string = "111"
s.send(string.encode())
# send speed and step
s.send(struct.pack(">I", 1700))
s.send(struct.pack(">I", steps_done))
s.send(struct.pack(">I", 1600))  # tilt
s.send(struct.pack(">I", 600))  # period

In [None]:
# Test observation and reward
sonars, img = utils_socket.get_obs_rgb(s)  #img: RGB
enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
obs = cv2.resize((enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8), (80,60))
obs = ((obs > 128)*255).astype(np.uint8)
reward = rewarder.reward(obs, sonars)
print(reward)
s.send(struct.pack(">i", steps_done))
s.send(struct.pack(">i", 1400)) 
steps_done += 1

In [None]:
# Test actions

In [None]:
Image.fromarray(img).show()

In [42]:
test = imgs_log[-64]

In [43]:
test.shape

(240, 320, 3)

In [44]:
enet_input = torch.from_numpy(np.transpose(test, (2,0,1))).unsqueeze(0).float()/255
a = (enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8)

In [45]:
tosave = Image.fromarray(a)

In [46]:
tosave.show()

In [47]:
Image.fromarray(test).show()

In [None]:
Image.fromarray(np.transpose((memory.memory[701].state*255).astype(np.uint8),(1,2,0))).show()

In [None]:
EPS_START = 0.9
EPS_END = 0.05
EPS_DECAY = 100
x = range(2000)
y = []
for i in x:
    y.append(EPS_END + (EPS_START - EPS_END) * math.exp(-1. * i / EPS_DECAY))

In [91]:
len(imgs_log)

38