In [3]:
from gym import Env
from gym.spaces import Discrete, box
import numpy as np
import random

In [None]:
class DeliveryQAgent(QAgent):

    def __init__(self,*args,**kwargs):
        super().__init__(*args,**kwargs)
        self.reset_memory()

    def act(self,s):

        # Get Q Vector
        q = np.copy(self.Q[s,:])

        # Avoid already visited states
        q[self.states_memory] = -np.inf

        if np.random.rand() > self.epsilon:
            a = np.argmax(q)
        else:
            a = np.random.choice([x for x in range(self.actions_size) if x not in self.states_memory])

        return a


    def remember_state(self,s):
        self.states_memory.append(s)

    def reset_memory(self):
        self.states_memory = []

In [50]:
## Create the TSP Environment
import numpy as np
import gym
from gym import spaces
import pygame

from scipy.spatial.distance import cdist
from matplotlib.patches import Rectangle
from matplotlib.collections import PatchCollection
import matplotlib.pyplot as plt
plt.style.use("seaborn-dark")

class TSPEnvironment(gym.Env):
    def __init__ (self, n_stops = 100):
        print(f"TSP-Environment initialized with {n_stops} random stops")

        # Initialization
        #Number of stops
        self.n_stops = n_stops
        #Coordinates of stops
        self.xy = []
        
        self.action_space = spaces.Discrete(n_stops)
        self.observation_space = spaces.Discrete(n_stops)
        self.episode_length = n_stops
        self._visitedStops = []

        #set starting point (state)
        
        #Generate stops
        self._generate_stops()
        self._generate_q_values()
                
        

    def _get_obs(self):
        return {"agent": self._agent_location, "target": self._target_location}

    def _get_info(self):
        return {"distance": np.linalg.normal(self._agent_location - self._target_location, ord=1)} 

    def _generate_stops(self):
        self.xy = (np.random.rand(self.n_stops,2)*100).round(2)
        self.x=self.xy[:,0]
        self.y=self.xy[:,1]

        print(f'genrated stops xy: {self.xy}')
        
        #pick random StartPoint
        self._visitedStops.append(np.random.randint(0,self.n_stops))
        print(f'Starting Point: {self._visitedStops}')


    def _generate_q_values(self,box_size = 0.2):
        self.q_stops = cdist(self.xy,self.xy,'euclidean').round(2)
        print(f'Distance: \n {self.q_stops}')


    def render(self):
        pass

    #Resets StartingPoint
    def reset(self):
        self._visitedStops = []

        first_stop = np.random.randint(self.n_stops)
        self._visitedStops.append(first_stop)
        return first_stop

    def step(self,destination):
        #Get reward for such a move
        reward = self._get_reward(self._get_state(), destination)

        #set new position of agent
        #self.state = destination
        self._visitedStops.append(destination)
        print(f'Visited Stops: {self._visitedStops}')

        done = len(self._visitedStops) == self.n_stops

        info = {}

        print(f'Agent position: {self._get_state()}')

        return self._get_state(), reward, done, info


    #return stops[-1]
    #Gibt die aktuelle Position des Agenten zurück
    def _get_state(self):
        if( len(self._visitedStops)>0):
            return self._visitedStops[-1]
        else:
            return 0 

    def _get_reward(self, state, destination):
        return self.q_stops[self._get_state(), destination]

    def _get_xy(self):
        return self.xy[self._get_state()]



  plt.style.use("seaborn-dark")


In [51]:
episodes = 5 

env = TSPEnvironment(episodes)
action = env.action_space.sample()
len(env._visitedStops)
n_state, reward, done, info = env.step(action)


TSP-Environment initialized with 5 random stops
genrated stops xy: [[91.97 56.92]
 [71.08 91.64]
 [82.89 90.14]
 [95.84 45.98]
 [11.78 23.56]]
Starting Point: [1]
Distance: 
 [[ 0.   40.52 34.44 11.6  86.85]
 [40.52  0.   11.9  51.94 90.28]
 [34.44 11.9   0.   46.02 97.41]
 [11.6  51.94 46.02  0.   87.  ]
 [86.85 90.28 97.41 87.    0.  ]]
Visited Stops: [1, 3]
Agent position: 3


In [47]:
episodes = 10 

env = TSPEnvironment(episodes)


for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0

    while not done:
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Epsiode: {} Score: {}'.format(episode,score))

TSP-Environment initialized with 10 random stops
genrated stops xy: [[78.73 61.46]
 [53.33 80.51]
 [36.27  4.91]
 [34.98 26.58]
 [ 1.62 49.81]
 [76.22 40.21]
 [83.39 34.32]
 [34.19 69.32]
 [95.21 63.72]
 [84.87 24.43]]
Starting Point: [6]
 [[ 0.   31.75 70.72 55.95 77.99 21.4  27.54 45.23 16.63 37.54]
 [31.75  0.   77.5  56.97 60.14 46.35 55.11 22.17 45.12 64.34]
 [70.72 77.5   0.   21.71 56.72 53.31 55.54 64.44 83.26 52.37]
 [55.95 56.97 21.71  0.   40.65 43.43 49.02 42.75 70.76 49.94]
 [77.99 60.14 56.72 40.65  0.   75.22 83.22 37.97 94.62 87.03]
 [21.4  46.35 53.31 43.43 75.22  0.    9.28 51.13 30.22 18.  ]
 [27.54 55.11 55.54 49.02 83.22  9.28  0.   60.38 31.69 10.  ]
 [45.23 22.17 64.44 42.75 37.97 51.13 60.38  0.   61.28 67.7 ]
 [16.63 45.12 83.26 70.76 94.62 30.22 31.69 61.28  0.   40.63]
 [37.54 64.34 52.37 49.94 87.03 18.   10.   67.7  40.63  0.  ]]
Visited Stops: [1, 3]
Agent position: 3
Visited Stops: [1, 3, 5]
Agent position: 5
Visited Stops: [1, 3, 5, 4]
Agent position: 4


In [32]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam



In [33]:
states = env.observation_space.shape
actions = env.action_space.n

In [34]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24,activation='relu', input_shapes=states))
    model.add(Dense(24,activation='relu'))
    model.add(Dense(action,activation='linear'))
    return model

In [35]:
model = build_model(states, actions)
model.summary()

TypeError: ('Keyword argument not understood:', 'input_shapes')