In [None]:
import socket
import sys
import cv2
import numpy as np
import struct
import argparse
import math
import random
import json
import datetime
from collections import namedtuple

import torch
import torch.optim as optim
from tqdm import tqdm_notebook
from tqdm import tqdm

import DQN
from Enet import ENet
import utils_socket
import utils_obs
from os import listdir
import matplotlib.pyplot as plt
from PIL import Image
import time


In [None]:
class Args:
    model_direc = "./Model/"
    interruption_direc = "./Interruption/"
    obs_mode = 1  #0: raw image,  1: processed_img,  2: segmentation image
    total_steps = 10000
    lr = 0.0005
    batch_size = 32
    grad_clamp = 1
    steering  = [1425,1450,1475,1500,1525,1550,1575]  #steering for severn actions
    EPS_START = 0.9
    EPS_END = 0.05
    EPS_DECAY = 50
args = Args()

def send_action(step, action, socket):
    socket.send(struct.pack(">i", step))
    socket.send(struct.pack(">i", args.steering[action]))       

def select_action(steps_done, state, args, model):
    sample = random.random()
    eps_threshold = args.EPS_END + (args.EPS_START - args.EPS_END) * math.exp(-1. * steps_done / args.EPS_DECAY)
    if sample > eps_threshold:
        with torch.no_grad():
            state = torch.from_numpy(state).float().unsqueeze(0)
            return model(state).max(1)[1][0].item()
    else:
        return random.randrange(7)
    
def select_action_determine(state, model):
    with torch.no_grad():
        state = torch.from_numpy(state).float().unsqueeze(0)
        return model(state).max(1)[1][0].item()    

def get_model_obs(img, semantic_img, mode):
    if mode == 0:
        obs = cv2.resize(img, (80,60))/255
        return np.transpose(obs, (2,0,1))  # 3x60x80
    else:
        obs = (cv2.resize(semantic_img, (80,60))/255)
        return np.stack([obs, obs, obs], axis=0)  # 3x60x80    

In [None]:
# start
interrupted = False
steps_done = 0
steps_todo = 100000

In [None]:
# initializations
# models 
seg_model_path = "./segmodel/seg_model3.0"
enet = ENet(2)
enet.eval()
enet.load_state_dict(torch.load(seg_model_path))

# model
model_path = "./policymodel/1700.pt"

policy_dqn = DQN.DQN()
policy_dqn.load_state_dict(torch.load(model_path))

rewarder = utils_obs.Rewarder(320,240)

In [None]:
device = torch.device("cuda:0")
enet = enet.to(device)
enet.eval()
policy_dqn = policy_dqn.to(device)
policy_dqn.eval()

In [None]:
steps_done

In [None]:
# connect to agent
#ip_port = ('169.234.117.60', 21111)
ip_port = ('192.168.43.1', 21111)
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
s.connect(ip_port)
# send password
string = "111"
s.send(string.encode())
# send speed and step
s.send(struct.pack(">I", 1650))
s.send(struct.pack(">I", steps_done))
s.send(struct.pack(">I", 1500))  # tilt
s.send(struct.pack(">I", 400))  # period

In [None]:
########### Train ##############

# reset
sonars, img = utils_socket.get_obs_rgb(s, 120, 160)  #img: RGB
img = cv2.resize(img, (320,240))
enet_input = (torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255).to(device)
semantic_obs = (enet(enet_input).squeeze(0).argmax(0)*255).cpu().numpy().astype(np.uint8)
reward = rewarder.reward(semantic_obs, (10,10,10))
obs = get_model_obs(img, semantic_obs, 1)
obs = torch.from_numpy(obs).float().unsqueeze(0).to(device)

print("start")
for i in range(steps_todo):
    # select actions
    print(i)
    #print(1, time.time()*1000)
    if reward > 0:
        action = policy_dqn(obs).max(1)[1][0].item()
    else:
        action = 3
    #action = select_action_determine(obs, policy_dqn)
    #print(2, time.time()*1000)
    send_action(steps_done, action, s)
    # get next state and reward
    #print(3, time.time()*1000)
    sonars, img = utils_socket.get_obs_rgb(s, 120, 160)
    img = cv2.resize(img, (320,240))
    #print(4, time.time()*1000)
    enet_input = (torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255).to(device)
    semantic_obs = (enet(enet_input).squeeze(0).argmax(0)*255).cpu().numpy().astype(np.uint8)
    reward = rewarder.reward(semantic_obs, (10,10,10))
    obs = get_model_obs(img, semantic_obs, 1)
    obs = torch.from_numpy(obs).float().unsqueeze(0).to(device)
    
    steps_done += 1
    

In [None]:
steps_done = 136

In [None]:
send_action(-1, 3, s)  #ask robot to resend image

In [None]:
def show(imgs):
    cv2.startWindowThread()
    for i in range(len(imgs)):
        cv2.imshow('image'+str(i),imgs[i])
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
# Test observation and reward
sonars, img = utils_socket.get_obs_rgb(s, 120, 160)  #img: RGB
enet_input = torch.from_numpy(np.transpose(img, (2,0,1))).unsqueeze(0).float()/255
obs = cv2.resize((enet(enet_input).squeeze(0).argmax(0)*255).numpy().astype(np.uint8), (80,60))
obs = ((obs > 128)*255).astype(np.uint8)
reward = rewarder.reward(obs, sonars)
print(reward)
s.send(struct.pack(">i", steps_done))
s.send(struct.pack(">i", 1400)) 
steps_done += 1

In [None]:
# Test actions

In [None]:
img.shape

In [None]:
Image.fromarray(rimg).show()