In [2]:
import socket
import sys
import cv2
import numpy as np
import struct
import argparse
import math
import random
import json
import datetime
from collections import namedtuple

import torch
import torch.optim as optim

import DQN
from Enet import ENet
import utils_socket
import utils_obs
from os import listdir
import matplotlib.pyplot as plt
from PIL import Image


In [3]:
class Args:
    model_direc = "./Model/"
    interruption_direc = "./Interruption/"
    obs_mode = 1  #0: raw image,  1: processed_img,  2: segmentation image
    total_steps = 10000
    lr = 0.0005
    batch_size = 32
    grad_clamp = 1
    steering  = [1400,1425,1450,1500,1550,1570,1600]  #steering for severn actions
    EPS_START = 0.9
    EPS_END = 0.05
    EPS_DECAY = 50
args = Args()

def send_action(step, action, socket):
    socket.send(struct.pack(">i", step))
    socket.send(struct.pack(">i", args.steering[action]))       

def select_action(steps_done, state, args, model):
    sample = random.random()
    eps_threshold = args.EPS_END + (args.EPS_START - args.EPS_END) * math.exp(-1. * steps_done / args.EPS_DECAY)
    if sample > eps_threshold:
        with torch.no_grad():
            state = torch.from_numpy(state).float().unsqueeze(0)
            return model(state).max(1)[1][0].item()
    else:
        return random.randrange(7)
    
def select_action_determine(state, model):
    with torch.no_grad():
        state = torch.from_numpy(state).float().unsqueeze(0)
        return model(state).max(1)[1][0].item()    

def get_model_obs(img, semantic_img, mode):
    if mode == 0:
        obs = cv2.resize(img, (80,60))/255
        return np.transpose(obs, (2,0,1))  # 3x60x80
    else:
        obs = (cv2.resize(semantic_img, (80,60))/255)
        return np.stack([obs, obs, obs], axis=0)  # 3x60x80    

In [136]:
# start
interrupted = False
steps_done = 0
steps_todo = 4000

In [4]:
# initializations
# helpers 
seg_model_path = "/Users/karl/Documents/Notebooks/RobotRL/segmodel/seg_model3.0"
enet = ENet(2)
enet.eval()
enet.load_state_dict(torch.load(seg_model_path, map_location='cpu'))

# model
model_path = "/Users/karl/Documents/Notebooks/RobotRL/Model_raw/1000.pt"

policy_dqn = DQN.DQN()
policy_dqn.load_state_dict(torch.load(model_path))


In [162]:
# connect to agent
ip_port = ('192.168.43.1', 21111)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(ip_port)
# send password
string = "111"
s.send(string.encode())
# send speed and step
s.send(struct.pack(">I", 1700))
s.send(struct.pack(">I", steps_done))
s.send(struct.pack(">I", 1600))  # tilt
s.send(struct.pack(">I", 600))  # period

4

In [5]:
img = np.asarray(Image.open("/Users/karl/Desktop/test_sfm/28.png"))

In [25]:
start = datetime.datetime.now()
enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
semantic_obs = (enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8)
obs = get_model_obs(img, semantic_obs, 1)
action = select_action_determine(obs, policy_dqn)
end = datetime.datetime.now()
print(end-start)

0:00:00.969905


In [324]:
########### Train ##############

# reset
sonars, img = utils_socket.get_obs_rgb(s)  #img: RGB
enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
semantic_obs = (enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8)
obs = get_model_obs(img, semantic_obs, 1)

print("start")
while(steps_done < steps_todo):
    # select actions
    action = select_action_determine(obs, policy_dqn)
    send_action(steps_done, action, s)
    # get next state and reward
    sonars, img = utils_socket.get_obs_rgb(s)
    enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
    semantic_obs = (enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8)
    obs_next = get_model_obs(img, semantic_obs, 1)
    
    steps_done += 1
    
    obs = obs_next

start
1183


In [323]:
send_action(-1, 3, s)  #ask robot to resend image
with open("./Record/reward.txt", "w") as f:
    json.dump(rewards, f)

In [203]:
def show(imgs):
    cv2.startWindowThread()
    for i in range(len(imgs)):
        cv2.imshow('image'+str(i),imgs[i])
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [90]:
# save checkpoints
ckp = {}
ckp['policy'] = policy_dqn.state_dict()
ckp['target'] = target_dqn.state_dict()
ckp['steps_done'] = steps_done
ckp['memory'] = memory
ckp['rewards'] = rewards
torch.save(ckp,"Interruption/ckp")

In [32]:
# For Test
steps_done = 0
# connect to agent
ip_port = ('192.168.43.1', 21111)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.connect(ip_port)
# send password
string = "111"
s.send(string.encode())
# send speed and step
s.send(struct.pack(">I", 1700))
s.send(struct.pack(">I", steps_done))
s.send(struct.pack(">I", 1600))  # tilt
s.send(struct.pack(">I", 600))  # period

4

In [114]:
# Test observation and reward
sonars, img = utils_socket.get_obs_rgb(s)  #img: RGB
enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
obs = cv2.resize((enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8), (80,60))
obs = ((obs > 128)*255).astype(np.uint8)
reward = rewarder.reward(obs, sonars)
print(reward)
s.send(struct.pack(">i", steps_done))
s.send(struct.pack(">i", 1400)) 
steps_done += 1

0.5


In [76]:
# Test actions

In [119]:
Image.fromarray(img).show()

In [139]:
len(memory.memory)

0

In [291]:
img = np.asarray(Image.open("/Users/karl/Downloads/aaa/2.png"))
test = imgs_log[-885]

In [292]:
enet_input = torch.from_numpy(np.transpose(test, (2,0,1))).unsqueeze(0).float()/255
a = cv2.resize((enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8), (80,60))

In [293]:
tosave = Image.fromarray(a)

In [294]:
tosave.show()

In [277]:
Image.fromarray(test).show()