# Import Dependencies

In [8]:
print(0)
import time
import numpy as np
import airsim
import config

clockspeed = 1
timeslice = 0.5 / clockspeed
goalY = 57
outY = -0.5
floorZ = 1.18
goals = [7, 17, 27.5, 45, goalY]
speed_limit = 0.2
ACTION = ['00', '+x', '+y', '+z', '-x', '-y', '-z']


class Env:
    def __init__(self):
        # connect to the AirSim simulator
        self.client = airsim.MultirotorClient()
        self.client.confirmConnection()
        self.action_size = 3
        self.level = 0

    def reset(self):
        self.level = 0
        self.client.reset()
        self.client.enableApiControl(True)
        self.client.armDisarm(True)

        # my takeoff
        self.client.simPause(False)
        self.client.moveByVelocityAsync(0, 0, -1, 2 * timeslice).join()
        self.client.moveByVelocityAsync(0, 0, 0, 0.1 * timeslice).join()
        self.client.hoverAsync().join()
        self.client.simPause(True)
        quad_vel = self.client.getMultirotorState().kinematics_estimated.linear_velocity
        responses = self.client.simGetImages(
            [airsim.ImageRequest(1, airsim.ImageType.DepthVis, True)])
        quad_vel = np.array([quad_vel.x_val, quad_vel.y_val, quad_vel.z_val])
        observation = [responses, quad_vel]
        return observation

    def step(self, quad_offset):
        # move with given velocity
        quad_offset = [float(i) for i in quad_offset]
        # quad_vel = self.client.getMultirotorState().kinematics_estimated.linear_velocity
        self.client.simPause(False)

        has_collided = False
        landed = False
        self.client.moveByVelocityAsync(
            quad_offset[0], quad_offset[1], quad_offset[2], timeslice)
        # self.client.moveByVelocityAsync(quad_vel.x_val+quad_offset[0], quad_vel.y_val+quad_offset[1], quad_vel.z_val+quad_offset[2], timeslice)
        collision_count = 0
        start_time = time.time()
        while time.time() - start_time < timeslice:
            # get quadrotor states
            quad_pos = self.client.getMultirotorState().kinematics_estimated.position
            quad_vel = self.client.getMultirotorState().kinematics_estimated.linear_velocity

            # decide whether collision occured
            collided = self.client.simGetCollisionInfo().has_collided
            # landed = quad_pos.y_val > 10 and self.client.getMultirotorState().landed_state == airsim.LandedState.Landed
            # landed = landed or (quad_pos.y_val > 10 and quad_vel.x_val == 0 and quad_vel.y_val == 0 and quad_vel.z_val == 0)
            landed = (quad_vel.x_val == 0 and quad_vel.y_val ==
                      0 and quad_vel.z_val == 0)
            landed = landed or quad_pos.z_val > floorZ
            collision = collided or landed
            if collision:
                collision_count += 1
            if collision_count > 10:
                has_collided = True
                break
        self.client.simPause(True)

        # observe with depth camera
        responses = self.client.simGetImages(
            [airsim.ImageRequest(1, airsim.ImageType.DepthVis, True)])

        # get quadrotor states
        quad_pos = self.client.getMultirotorState().kinematics_estimated.position
        quad_vel = self.client.getMultirotorState().kinematics_estimated.linear_velocity

        # decide whether done
        dead = has_collided or quad_pos.y_val <= outY
        done = dead or quad_pos.y_val >= goalY

        # compute reward
        reward = self.compute_reward(quad_pos, quad_vel, dead)

        # log info
        info = {}
        info['Y'] = quad_pos.y_val
        info['level'] = self.level
        if landed:
            info['status'] = 'landed'
        elif has_collided:
            info['status'] = 'collision'
        elif quad_pos.y_val <= outY:
            info['status'] = 'out'
        elif quad_pos.y_val >= goalY:
            info['status'] = 'goal'
        else:
            info['status'] = 'going'
        quad_vel = np.array([quad_vel.x_val, quad_vel.y_val, quad_vel.z_val])
        observation = [responses, quad_vel]
        return observation, reward, done, info

    def compute_reward(self, quad_pos, quad_vel, dead):
        vel = np.array([quad_vel.x_val, quad_vel.y_val,
                       quad_vel.z_val], dtype=np.float)
        speed = np.linalg.norm(vel)
        if dead:
            reward = config.reward['dead']
        elif quad_pos.y_val >= goals[self.level]:
            self.level += 1
            # reward = config.reward['forward'] * (1 + self.level / len(goals))
            reward = config.reward['goal'] * (1 + self.level / len(goals))
        elif speed < speed_limit:
            reward = config.reward['slow']
        else:
            reward = float(vel[1]) * 0.1
        # elif vel[1] > 0:
        #     reward = config.reward['forward'] * (1 + self.level / len(goals))
        # else:
        #     reward = config.reward['normal']
        return reward

    def disconnect(self):
        self.client.enableApiControl(False)
        self.client.armDisarm(False)
        print('Disconnected.')

0


In [9]:
# env: gym37
import sys # 3.7.16
import random
import numpy as np # 1.21.6
import gym # 0.25.2
import tensorflow # 2.10.0
from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.optimizers import Adam    
import rl # keras-rl2==1.0.5
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

print("Python: " + sys.version[0:7])
print("NumPy: " + np.__version__)
print("gym: " + gym.__version__)
print("TensorFlow: " + tensorflow.__version__)
print("Keras-rl2: 1.0.5")

Python: 3.8.18 
NumPy: 1.24.3
gym: 0.26.2
TensorFlow: 2.13.0
Keras-rl2: 1.0.5


# Create Environment

In [10]:
env = Env()

TypeError: unsupported operand type(s) for *: 'AsyncIOLoop' and 'float'

In [11]:
!pip show tornado

Name: tornado
Version: 4.5.3
Summary: Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed.
Home-page: http://www.tornadoweb.org/
Author: Facebook
Author-email: python-tornado@googlegroups.com
License: http://www.apache.org/licenses/LICENSE-2.0
Location: c:\users\kaan-\anaconda3\envs\airsim38\lib\site-packages
Requires: 
Required-by: ipykernel, jupyter_client, msgpack-rpc-python


In [5]:
!pip uninstall airsim -y & pip install airsim

Found existing installation: airsim 1.8.1
Uninstalling airsim-1.8.1:
  Successfully uninstalled airsim-1.8.1
Collecting airsim
  Using cached airsim-1.8.1-py3-none-any.whl
Installing collected packages: airsim
Successfully installed airsim-1.8.1


In [24]:
env = gym.make("CartPole-v1", render_mode="human")
# env = gym.make("CartPole-v1")
states = env.observation_space.shape[0]
actions = env.action_space.n

# Create Model

In [14]:
INPUT_SHAPE = (256, 256, 3)
model = Sequential()
model.add(Flatten(input_shape=(INPUT_SHAPE)))
model.add(Dense(24, activation="relu"))
model.add(Dense(24, activation="relu"))
model.add(Dense(actions, activation="linear"))

agent = DQNAgent(
    model=model,
    memory=SequentialMemory(limit=50000, window_length=1),
    policy=BoltzmannQPolicy(),
    nb_actions=actions,
    nb_steps_warmup=10,
    target_model_update=0.01
)

agent.compile(optimizer=Adam(learning_rate=0.001), metrics=["mae"])

In [15]:
agent.fit(env=env, nb_steps=100000, visualize=False, verbose=1)

Training for 100000 steps ...
Interval 1 (0 steps performed)
53 episodes - episode_reward: 187.094 [150.000, 248.000] - loss: 3.043 - mae: 27.078 - mean_q: 54.968

Interval 2 (10000 steps performed)
49 episodes - episode_reward: 203.388 [175.000, 261.000] - loss: 3.169 - mae: 39.208 - mean_q: 79.212

Interval 3 (20000 steps performed)
48 episodes - episode_reward: 208.500 [175.000, 238.000] - loss: 2.373 - mae: 39.637 - mean_q: 79.876

Interval 4 (30000 steps performed)
43 episodes - episode_reward: 233.814 [166.000, 500.000] - loss: 1.528 - mae: 37.609 - mean_q: 75.681

Interval 5 (40000 steps performed)
25 episodes - episode_reward: 397.520 [173.000, 500.000] - loss: 7.048 - mae: 42.817 - mean_q: 86.124

Interval 6 (50000 steps performed)
52 episodes - episode_reward: 193.385 [9.000, 330.000] - loss: 22.863 - mae: 60.690 - mean_q: 121.771

Interval 7 (60000 steps performed)
39 episodes - episode_reward: 256.103 [194.000, 500.000] - loss: 8.538 - mae: 62.424 - mean_q: 125.307

Interva

<keras.callbacks.History at 0x20c35b53648>

In [25]:
results = agent.test(env, nb_episodes=10, verbose=1)
print(np.mean(results.history["episode_reward"]))


env.close()

Testing for 10 episodes ...
Episode 1: reward: 500.000, steps: 500
Episode 2: reward: 500.000, steps: 500
Episode 3: reward: 500.000, steps: 500
Episode 4: reward: 500.000, steps: 500
Episode 5: reward: 500.000, steps: 500
Episode 6: reward: 500.000, steps: 500
Episode 7: reward: 500.000, steps: 500
Episode 8: reward: 500.000, steps: 500
Episode 9: reward: 500.000, steps: 500
Episode 10: reward: 500.000, steps: 500
500.0
