<a href="https://colab.research.google.com/github/Luke-687/Simulating-Chemotaxis--Q-learning-and-Heuristic-/blob/main/Chemotaxis_QLearning_(Base_Code).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import math
import random
import matplotlib.pyplot as plt

In [1]:
#Agent class for everything
class agent:
  def __init__(self, alpha, gamma, epsilon, s):
    self.alpha=alpha
    self.gamma=gamma
    self.epsilon = epsilon
    self.s = s
    self.Qtable = {}
    for i1 in range(0,20):
      for j1 in range(0,20):
        for direction in range(0,8):
          for velocity in range(1,4):
            self.Qtable[((i1,j1),(direction, velocity))] = [0, self.moveForQTable(i1,j1, direction, velocity)] # Added self.

  #Following 4 functions to define Q-table and train
  def moveForQTable(self, initX, initY, direction, velocity): # Added self
    direction = self.directionKey(direction) # Added self.
    return [(initX + int(math.cos(direction)*velocity)),(initY + int(math.sin(direction)*velocity))]
  def directionKey(self, direction): # Added self
    angle = direction*45
    return angle*math.pi/180
  def reward(self, x, y): # Added self
    fromCenter = math.sqrt((x-9.5)**2+(y-9.5)**2)
    if(fromCenter<=5):
      reward = 10/(fromCenter**self.s)
    elif(int(fromCenter) == 0):
      reward=50
    else:
      reward=-50
    return reward
  def brownianMotion():
    result = [0,0]
    xChange = random.random()/4
    yChange = random.random()/4
    for i in range(10):
      xChange+= random.random()/100 * (-1**(random.randint(1,2)))
      yChange+= random.random()/100 * (-1**(random.randint(1,2)))
    result = [xChange, yChange]
    return result

  def trainQTable(self, trainingEpisodes, perc):
    trainingRewards = [[],[]]
    for iterations in range(trainingEpisodes):
      totReward=0

      #Starting position some (x,y) outside of gradient:
      currentX = 0
      currentY = 0

      #percentage difference in epsilon from inside to outside gradient = "perc"

      for steps in range(0,50):
        direction, velocity = max(
            [(d,v) for d in range(8) for v in range(1,4)],
            key=lambda a: self.Qtable[(int(currentX),int(currentY)),a][0] # Convert to int
        )
        fromCenter = math.sqrt((currentX-9.5)**2+(currentY-9.5)**2)
        if(fromCenter>5):
          tempEpsilon = self.epsilon+perc
          if(random.random()<tempEpsilon):
            direction = random.randint(0,7)
            velocity = random.randint(1,3)
        else:
          if(random.random()<self.epsilon):
            direction = random.randint(0,7)
            velocity = random.randint(1,3)
        nextPos = self.moveForQTable(currentX, currentY, direction, velocity)
        #Apply Brownian motion to the next position
        XYBrown = brownianMotion()
        nextX = max(0, min(19, nextPos[0]+XYBrown[0]))
        nextY = max(0, min(19, nextPos[1]+XYBrown[1]))
        initReward = self.reward(nextX, nextY)
        nextReward = max(
            self.Qtable[(int(nextX), int(nextY)), (d,v)][0] # Convert to int
            for d in range(8)
            for v in range(1,4)
        )
        self.Qtable[((int(currentX),int(currentY)),(direction, velocity))][0] = self.Qtable[((int(currentX),int(currentY)),(direction, velocity))][0] + self.alpha*(initReward + self.gamma*nextReward-self.Qtable[((int(currentX),int(currentY)),(direction, velocity))][0]) # Convert to int
        currentX = nextX
        currentY = nextY
        totReward+=self.Qtable[((int(currentX),int(currentY)),(direction, velocity))][0] + self.alpha*(initReward + self.gamma*nextReward-self.Qtable[((int(currentX),int(currentY)),(direction, velocity))][0]) # Convert to int
      trainingRewards[0].append(iterations)
      trainingRewards[1].append(totReward)
    return trainingRewards

  def simulate(self, steps, perc): # Added self
    x=[]
    y=[]

    #Starting position is some (x,y) outside of gradient
    currentX = 0
    currentY = 0

    for steps in range(0,steps):
      currentX = max(0, min(19, currentX))
      currentY = max(0, min(19, currentY))
      x.append(currentX)
      y.append(currentY)
      rewardOption = -100000
      currentX = int(currentX)
      currentY = int(currentY)
      nextPos = [currentX, currentY]
      for direction in range(0,8):
        for velocity in range(1,4):
          testReward = self.Qtable[(currentX, currentY), (direction, velocity)][0]
          if(testReward>=rewardOption):
            rewardOption = testReward
            nextPos = self.Qtable[(currentX, currentY), (direction, velocity)][1]
      #Make the agent more random outside gradient:
      fromCenter = math.sqrt((currentX-9.5)**2+(currentY-9.5)**2)
      if(fromCenter>5):
        tempEpsilon = self.epsilon+perc
        if(random.random()<tempEpsilon):
          nextPos = self.Qtable[(currentX, currentY), (random.randint(0,7), random.randint(1,3))][1]
      tempBrownian = brownianMotion()
      XYBrown = tempBrownian.setBrownian()
      currentX = nextPos[0] + XYBrown[0]
      currentY = nextPos[1] + XYBrown[1]
    return x,y


In [None]:
#Gradient class
class gradient:
  def __init__(self, xSize, ySize):
    self.dimensions = [xSize, ySize]
  def createGradient(self, s):
    #s is for the steepness
    xG =[]
    yG = []
    gradient_values = []
    for x in range(0, self.dimensions[1]+1):
      for y in range(0, self.dimensions[0]+1):
        xG.append(x)
        yG.append(y)
        if(math.sqrt((x-(self.dimensions[0]/2))**2+(y-(self.dimensions[1]/2))**2)<=5):
          if(math.sqrt((x-9.5)**2+(y-9.5)**2)>1):
            gradient_values.append(1/(((x-9.5)**2+(y-9.5)**2)**s))
          else:
            gradient_values.append(1)
        else:
          gradient_values.append(0)

    return xG, yG, gradient_values

In [None]:
#Key values which can be alterred
alpha = 0.99
gamma = 0.99
epsilon = 0.15
steepness = 0.85