In [439]:
import gym
from gym.spaces import Discrete, Box
import numpy as np
import random

In [440]:
import socket
from time import sleep
import sys
import json


class ClientCommunication:
    ue_tasks = {"step": 0,
                "reset": 1,
                "state": 2,
                "reward": 3,
                "is_done": 4}
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s_notify = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    def __init__(self, host, port):
        self.host = host
        self.port = port

        try:
            self.s.connect((self.host, self.port))
            print("connected! (main)")
        except:
            print("connection failed or interrupted (main)")

        try:
            self.s_notify.connect((self.host, self.port - 1))
            print("connected! (notify)")
        except:
            print("connection failed or interrupted (notify)")

    def step(self, action):
        # try:
            data = {'id': self.ue_tasks['step'], 'action': []}
            if not hasattr(action, '__iter__'):
                data['action'] = [float(action)]
            else:
                try:
                    data['action'] = list(action.tolist())
                except:
                    try:
                        data['action'] = list(action)
                    except:
                        data['action'] = [action]
                for i in range(len(data['action'])):
                    data['action'][i] = float(data['action'][i] )
            print(data)
            self.s.sendall(bytes(json.dumps(data), encoding="utf-8"))
            print("post: step")

            self.wait()
            info_ = {}
            observation_ = self.state()
            reward_ = self.reward()
            done_ = self.is_done()
            return observation_, reward_, done_, info_
        # except:
        #     print("step: connection failed or interrupted")

    def reset(self):
        try:
            data = {'id': self.ue_tasks['reset'], 'action': [0]}
            self.s.sendall(bytes(json.dumps(data), encoding="utf-8"))
            print("post: reset")

            self.wait()
            return self.state()

        except:
            print("reset: connection failed or interrupted")

    def state(self):
        try:
            data = {'id': self.ue_tasks['state'], 'action': [0]}
            self.s.sendall(bytes(json.dumps(data), encoding="utf-8"))

            received = self.s_notify.recv(2048)
            received = received.decode('utf-8')
            received = json.loads(received)
            print("get: state")
            print(received['state'][0])
            return received['state'][0]
        except:
            print("state: connection failed or interrupted")

    def reward(self):
        try:
            data = {'id': self.ue_tasks['reward'], 'action': [0]}
            self.s.sendall(bytes(json.dumps(data), encoding="utf-8"))

            received = self.s_notify.recv(2048)
            received = received.decode('utf-8')
            received = json.loads(received)
            print("get: reward")
            return received['reward']

        except:
            print("reward: connection failed or interrupted")

    def is_done(self):
        try:
            data = {'id': self.ue_tasks['is_done'], 'action': [0]}
            self.s.sendall(bytes(json.dumps(data), encoding="utf-8"))

            received = self.s_notify.recv(2048)
            received = received.decode('utf-8')
            received = json.loads(received)
            print("get: is_done")
            return received['isDone']
        except:
            print("is_done: connection failed or interrupted")

    def wait(self):
        received = 0
        while received != "1":
            received = self.s_notify.recv(32)
            received = received.decode("utf-8")
        #print("next")


In [441]:
class UnrealEnv(gym.Env):

    def __init__(self, host, port):
        #Actions: down, stay, up
        self.action_space = Discrete(5)
        #Temperature array
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))

        self.client = ClientCommunication(host, port)
        #Set start temp
        #self.state = 38 + random.randint(-3, 3)

        # Set shower length
        #self.shower_length = 60

    def step(self, act):
        return self.client.step(act)

        # if 37 <= self.state <= 39:
        #     reward = 1
        # else:
        #     if self.state < 37:
        #         reward = -(37 - self.state)
        #     else:
        #         reward = (self.state - 39)
        #
        # if self.shower_length <= 0:
        #     done = True
        # else:
        #     done = False
        #
        # self.state += -1 + random.random() * (1 - (-1))
        #
        # info = {}
        #
        # return self.state, reward, done, info

    def render(self):
        pass

    @property
    def reward(self):
        return self.client.reward()

    @property
    def state(self):
        return self.client.state()

    @property
    def done(self):
        return self.client.is_done()

    def reset(self):
        return self.client.reset()

In [442]:
env = UnrealEnv("26.225.53.123", 7787)

connected! (main)
connected! (notify)


In [443]:
# env.step(2)

In [444]:
# episodes = 10
# arr = []
# for episode in range(1, episodes + 1):
#     state = env.reset()
#     done = False
#     score = 0
#
#     while not done:
#         env.render()
#         action = env.action_space.sample()
#         n_state, reward, done, info = env.step(action)
#         score += reward
#     arr.append('Episode: {} Score {}'.format(episode, score))

In [445]:
# for el in arr:
#     print(el)

In [446]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [447]:
states = env.observation_space.shape
actions = env.action_space.n

In [448]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(32, activation='sigmoid', input_shape = states))
    model.add(Dense(8, activation = 'relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [449]:
model = build_model(states, actions)

In [450]:
model.summary()

Model: "sequential_32"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_96 (Dense)            (None, 32)                64        
                                                                 
 dense_97 (Dense)            (None, 8)                 264       
                                                                 
 dense_98 (Dense)            (None, 5)                 45        
                                                                 
Total params: 373
Trainable params: 373
Non-trainable params: 0
_________________________________________________________________


In [451]:
from tensorflow.keras.optimizers.legacy import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [452]:


def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [453]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-2), metrics=['mae'])
dqn.fit(env, nb_steps=1000, visualize=False, verbose=1)

Training for 1000 steps ...
post: reset
get: state
44
Interval 1 (0 steps performed)
{'id': 0, 'action': [1.0]}
post: step
get: state
43
get: reward
get: is_done
    1/10000 [..............................] - ETA: 1:59:19 - reward: -1.0000{'id': 0, 'action': [3.0]}
post: step
get: state
43
get: reward
get: is_done
    2/10000 [..............................] - ETA: 10:59 - reward: -1.0000  {'id': 0, 'action': [1.0]}
post: step
get: state
43
get: reward
get: is_done
    3/10000 [..............................] - ETA: 11:04 - reward: -1.0000{'id': 0, 'action': [1.0]}
post: step
get: state
42
get: reward
get: is_done
    4/10000 [..............................] - ETA: 11:06 - reward: -1.0000{'id': 0, 'action': [2.0]}
post: step
get: state
42
get: reward
get: is_done
    5/10000 [..............................] - ETA: 11:04 - reward: -1.0000{'id': 0, 'action': [1.0]}
post: step
get: state
40
get: reward
get: is_done
    6/10000 [..............................] - ETA: 11:05 - reward: -1.000

<keras.callbacks.History at 0x169d77ef0d0>

In [454]:
_ = dqn.test(env, nb_episodes=15, visualize=True)

Testing for 15 episodes ...
post: reset
get: state
52
{'id': 0, 'action': [3.0]}
post: step
get: state
52
get: reward
get: is_done


TypeError: UnrealEnv.render() got an unexpected keyword argument 'mode'

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
print(np.mean(scores.history['episode_reward']))