In [None]:
import random
import math
import pickle
import cProfile
import matplotlib.pyplot as plt
import json
import time
import pandas as pd
import os
import numpy as np

### QLearn

In [None]:
class QLearn:

    ###################################################################################

    def __init__(self, epsilon=0.1, alpha=0.2, lambd=0.5, epsilon_decay=0.99, decayTill=0):
        self.q = {} #QLearn.masterQ
        self.oldstate = None
        self.actions = None
        self.epsilon = epsilon
        self.alpha = alpha
        self.lambd = lambd
        self.epsilon_decay = epsilon_decay
        self.age = 0
        self.decayTill = decayTill

    ###################################################################################

    def setActions(self, actions):
        self.actions = actions

    ###################################################################################

    def getQ(self, state, action):
        return self.q.get((state,action),0.0) # Why the zero here

    ###################################################################################

    def setQ(self, state, action, value):
        self.q[(state,action)] = value

    ###################################################################################

    def do(self, state):
        self.oldstate = state;
        if(random.random() < self.epsilon):
            action = random.choice(self.actions)
        else:
            q = [self.getQ(state, a) for a in self.actions]
            max_q = max(q); max_indices = [];
            for i in range(len(q)):
                if(max_q == q[i]): max_indices.append(i);
            i = random.choice(max_indices)
            action = self.actions[i]
                
        # Linear decay of epsilon value every 100 timesteps
        self.age += 1;
        if(self.age % 100 == 0 and self.epsilon > self.decayTill):
            self.epsilon *= self.epsilon_decay
            self.age = 0
        
        self.oldaction = action
        return action

    ###################################################################################
    
    def learn(self, newstate, reward):
        if(self.oldstate == None): return
        
        oldq = self.getQ(self.oldstate, self.oldaction)
        maxqnew = max([ self.getQ(newstate, a) for a in self.actions ])
        newq = oldq + self.alpha*(reward + self.lambd*maxqnew - oldq)
        self.setQ(self.oldstate, self.oldaction, newq)

### Agent

In [None]:
class Agent:

    ###################################################################################

    def __init__(self, actions=[0,1,2,3,4]):
        self.hasFood = 0
        self.foodCount = 0
        self.pherTime = 0

        self.ai = QLearn(epsilon=qEpsilon, lambd=qLambda, alpha=qAlpha, epsilon_decay=qDecay, decayTill=qDecayTill)
        self.ai.setActions(actions);
        
        self.data = {
            "time": np.zeros(stepsCount, dtype='uint32'),
            "row": np.zeros(stepsCount, dtype='uint8'), 
            "column": np.zeros(stepsCount, dtype='uint8'), 
            "dir": np.zeros(stepsCount, dtype='uint8'), 
            "state": [],
            "food": np.zeros(stepsCount, dtype='uint8'),
            "hasFood": np.zeros(stepsCount, dtype='uint8'), 
            "action": np.zeros(stepsCount, dtype='uint8')
        }
        
    ###################################################################################
    
    def update(self):
        here = self.getLocation()
        reward = negReward
        foodCollected = 0
        
        if(self.hasFood):
            if(here.isHome):
                self.hasFood = 0;
                self.turnAround();
                self.foodCount += 1;
                foodCollected = 1
                reward = posReward;
        else:
            if(here.isFood):
                self.hasFood = 1;
                self.turnAround();
        
        if(self.pherTime < 5):
            self.pherTime += 1;
        
        # state=(self.getHomePherLevel(), self.getFoodPherLevel(), self.pherTime) # External Case
        state=(self.getHomePherLevel(), self.getFoodPherLevel())
        
        self.ai.learn(state, reward)
        choice = self.ai.do(state)

        self.data['time'][self.world.age-1] = self.world.age
        self.data['row'][self.world.age-1] = self.row
        self.data['column'][self.world.age-1] = self.column
        self.data['action'][self.world.age-1] = choice
        self.data['dir'][self.world.age-1] = np.int8(self.dir)
        self.data['food'][self.world.age-1] = foodCollected
        self.data['hasFood'][self.world.age-1] = self.hasFood

        self.data['state'].append(state)

        if(choice == 0): self.dropHomePher();
        if(choice == 1): self.dropFoodPher();
        if(choice == 2): self.followHomePher();
        if(choice == 3): self.followFoodPher();
        if(choice == 4): self.moveRandomly();
    
    ###################################################################################

    def dropFoodPher(self):
        here = self.getLocation()
        here.foodPher += 0.2
        self.pherTime = 0
    
    ###################################################################################

    def dropHomePher(self):
        here = self.getLocation()
        here.homePher += 0.2
        self.pherTime = 0    
    
    ###################################################################################

    def getPherLevel(self, p):
        if(p == 0): return 0
        elif(p < 0.1): return 1
        elif(p < 0.25): return 2
        else: return 3
    
    ###################################################################################

    def getFoodPherLevel(self):
        return self.getPherLevel(self.getLocation().foodPher)
    
    ###################################################################################

    def getHomePherLevel(self):
        return self.getPherLevel(self.getLocation().homePher)
    
    ###################################################################################

    def followFoodPher(self):
        c = self.getCellAhead()
        self.turn(-1)
        l = self.getCellAhead()
        self.turn(2)
        r = self.getCellAhead()
        self.turn(-1)

        if(c.isFood): c = 1
        else: c = c.foodPher
        if(l.isFood): l = 1
        else: l = l.foodPher
        if(r.isFood): r = 1
        else: r = r.foodPher

        self.followPher(l,c,r)
    
    ###################################################################################

    def followHomePher(self):
        c = self.getCellAhead()
        self.turn(-1)
        l = self.getCellAhead()
        self.turn(2)
        r = self.getCellAhead()
        self.turn(-1)

        if(c.isFood): c = 1
        else: c = c.homePher
        if(l.isFood): l = 1
        else: l = l.homePher
        if(r.isFood): r = 1
        else: r = r.homePher

        self.followPher(l,c,r)
    
    ###################################################################################

    def followPher(self, l, c ,r):
        m = (l,c,r)
        max_m = max(m)
        max_ind = []

        for i in range(len(m)):
            if(max_m == m[i]): max_ind.append(i)
        i = random.choice(max_ind)
        
        if i == 0: self.turnLeft()
        elif i == 2: self.turnRight()

        self.goForward()

    ###################################################################################
        
    def moveRandomly(self):
        self.turn(random.choice([0,1,-1]))
        self.goForward()
   
    ###################################################################################
    
    def turn(self, amount):
        self.dir = (self.dir+amount)%len(self.world.directions)
        
    ###################################################################################
    
    def turnLeft(self):
        self.turn(-1)

    ###################################################################################
    
    def turnRight(self):
        self.turn(1)
        
    ###################################################################################
    
    def turnAround(self):
        self.turn(len(self.world.directions)/2)

    ###################################################################################

    def goForward(self):
        dcolumn, drow = self.world.directions[int(self.dir)]
        self.column = (self.column + dcolumn)%self.world.width
        self.row = (self.row + drow)%self.world.height

    ###################################################################################

    def goBackward(self):
        self.turnAround()
        self.goForward()
        self.turnAround()

    ###################################################################################

    def getLocation(self):
        return self.world.grid[self.row][self.column]

    ###################################################################################

    def seesAgentAhead(self):
        column, row = self.column, self.row
        self.goForward()
        column2, row2 = self.column, self.row
        self.column, self.row = column, row
        return self.world.isAgentAt(row2, column2)

    ###################################################################################

    def getCellAhead(self):
        column, row = self.column, self.row
        self.goForward()
        cell = self.getLocation()
        self.column, self.row = column, row
        return cell

### Cell

In [None]:
class Cell:

    ###################################################################################
    
    def __init__(self):
        self.isHome = 0; 
        self.isFood = 0;
        self.homePher = 0; 
        self.foodPher = 0;

    ###################################################################################
        
    def update(self, around):
        htotal = 0; ftotal = 0;
        for c in around:
            htotal+=c.homePher
            ftotal+=c.foodPher

        havg = htotal/len(around)
        favg = ftotal/len(around)
        
        self.homePher += (havg-self.homePher)*dispersionRate
        self.foodPher += (favg-self.foodPher)*dispersionRate
        self.homePher *= evaporationRate
        self.foodPher *= evaporationRate
        
        if self.homePher > 1: self.homePher = 1
        if self.foodPher > 1: self.foodPher = 1
        if self.homePher < 0.001: self.homePher = 0
        if self.foodPher < 0.001: self.foodPher = 0

    ###################################################################################

    def copy(self, other):
        self.isHome = other.isHome
        self.isFood = other.isFood
        self.homePher = other.homePher
        self.foodPher = other.foodPher

    ###################################################################################

    def load(self, text):
        self.isHome = 0
        self.isFood = 0
        if text == 'H': self.isHome = 1
        if text == 'F': self.isFood = 1

### World

In [None]:
class World:
    directions8 = [(0,-1),(1,-1),(1,0),(1,1),(0,1),(-1,1),(-1,0),(-1,-1)]
    directions4 = [(0,-1),(1,0),(0,1),(-1,0)]
    
    ###################################################################################
    
    def __init__(self, Cell, width, height, directions = 8, calcAround = 1, cellUpdate = 1):
        if(directions == 8):
            self.directions = World.directions8
        elif(directions == 4):
            self.directions = World.directions4
        else:
            print('Unknown number of directions:', directions)
        
        self.calcAround = calcAround
        self.cellUpdate = cellUpdate
        self.Cell = Cell
        self.width = width
        self.height = height
        
        self.grid = [ [Cell() for column in range(width)] for row in range(height)]
        self.grid2 = [ [Cell() for column in range(width)] for row in range(height)]
        
        self.agents = []
        self.age = 0
        
        self.homePherData = { "time": [] };
        self.foodPherData = { "time": [] };

        for row in range(height):
            for column in range(width):
                self.homePherData[row*width + column] = np.zeros(stepsCount, dtype = 'float32')
                self.foodPherData[row*width + column] = np.zeros(stepsCount, dtype = 'float32')
        
    ###################################################################################

    def randomize(self):
        for row in range(self.height):
            for column in range(self.width):
                self.grid[row][column].randomize()
        self.age = 0
        
    ###################################################################################

    def getCell(self, srow, scolumn, dir):
        dcolumn, drow = dir
        column = (scolumn + dcolumn) % self.width
        row = (srow + drow) % self.height
        return(self.grid[row][column])

    ###################################################################################

    def update(self):
        self.age += 1
        if(self.cellUpdate):
            for row in range(self.height):
                for column in range(self.width):
                    c = self.grid2[row][column]
                    c.__dict__ = dict(self.grid[row][column].__dict__)
                    if(self.calcAround):
                        around = [self.getCell(row,column,d) for d in self.directions]
                        c.update(around)
                    else:
                        c.update(None)

            self.grid, self.grid2 = self.grid2, self.grid
        
        for a in self.agents:
            a.update()
        
        self.homePherData["time"].append(self.age)
        self.foodPherData["time"].append(self.age)

        for row in range(height):
            for column in range(width):
                self.homePherData[row*width + column][self.age-1] = self.grid[row][column].homePher
                self.foodPherData[row*width + column][self.age-1] = self.grid[row][column].foodPher

    ###################################################################################

    def addAgent(self, agent, column = None, row = None, dir = None):
        if(column == None): column = random.randrange(self.width)
        if(row == None): row = random.randrange(self.height)
        if(dir == None): dir = random.randrange(len(self.directions))
        
        agent.column = column; agent.row = row; agent.dir = dir;
        agent.world = self;
        self.agents.append(agent);
        
    ###################################################################################
    
    def removeAgent(self, agent):
        self.agents.remove(agent)
        del agent.world

    ###################################################################################
    
    def isAgentAt(self, column, row):
        for a in agents:
            if(a.column == column and a.row == row): return 1
        return 0

    ###################################################################################

    def load(self, file):
        lines = open(file).readlines()
        lines = [x.strip('\n') for x in lines]
        fh = len(lines)
        fw = max([len(x) for x in lines])
        
        if(fh > self.height):
            fh = self.height
            starty = 0
        else:
            starty = int(0.5 * (self.height - fh))
            
        if(fw > self.width):
            fw = self.width
            startx = 0
        else:
            startx = int(0.5 * (self.width - fw))
        
        for row in range(fh):
            line = lines[row]
            for column in range(min(fw, len(line))):
                self.grid[starty + row][startx + column].load(line[column])
        
        self.age = 0
        
        self.isHomeData = []        
        self.isFoodData = []
        
        for row in range(height):
            for column in range(width):
                self.isHomeData.append(self.grid[row][column].isHome)
                self.isFoodData.append(self.grid[row][column].isFood)

### Main

In [None]:
map = 'food.txt';
height = 30; 
width = 30;

version = 'pure external width epsilon reducing to zero overtime'

antCount = 10;

dispersionRate = 0.04       # rate of spread of pheromone
evaporationRate = 0.99      # rate of disappearance of pheromone

# Purely External
actions = [0,1,2,3,4]       # set to [2,3,4] for not structure generation

posReward = 10              # reward for completing a trip
negReward = -1              # reward for every action

qEpsilon = 0.1              # Q-Learning exploration rate
qLambda = 0.95              # Q-Learning future discount rate
qAlpha = 0.2                # Q-Learning learning rate
qDecay = 0.99               # Decay per 100 timesteps
qDecayTill = 0              # Q decay till

world = None

In [None]:
def run():
    global world
    world = World(Cell, width, height)
    world.load(map)
    
    homes = []
    for row in range(height):
        for column in range(width):
            if(world.grid[row][column].isHome):
                homes.append((row, column))
    
    for i in range(antCount):
        ant = Agent(actions = actions)
        row, column = random.choice(homes)
        world.addAgent(ant, row = row, column = column)
        
    for i in range(stepsCount):
        world.update()

### Logging Data

In [None]:
def get_agents_dataFrame():

    df_agents = []

    for i in range(antCount):
        temp = pd.DataFrame(world.agents[i].data)
        temp.set_index("time", inplace = True)
        df_agents.append(temp)
        del world.agents[i].data
    
    return df_agents

In [None]:
def get_world_homePher_dataFrame():

    df_world_homePher = pd.DataFrame(world.homePherData)
    df_world_homePher.set_index("time", inplace = True)
    del world.homePherData

    return df_world_homePher

In [None]:
def get_world_foodPher_dataFrame():

    df_world_foodPher = pd.DataFrame(world.foodPherData)
    df_world_foodPher.set_index("time", inplace = True)
    del world.foodPherData
    
    return df_world_foodPher

In [None]:
def get_params():

    params = {}

    params['version'] = version
    
    params['width'] = width
    params['height'] = height
    
    params['isHome'] = world.isHomeData
    params['isFood'] = world.isFoodData

    params['antCount'] = antCount
    params['stepsCount'] = stepsCount

    params['dispersionRate'] = dispersionRate
    params['evaporationRate'] = evaporationRate
    params['actions'] = actions

    params['posReward'] = posReward
    params['negReward'] = negReward

    params['qEpsilon'] = qEpsilon
    params['qLambda'] = qLambda
    params['qAlpha'] = qAlpha
    params['qDecay'] = qDecay
    params['qDecayTill'] = qDecayTill
    
    return params

In [None]:
def log_agents_data(directory):
    agent_dfs = get_agents_dataFrame()
    for i in range(len(agent_dfs)):
        filename = 'agent_' + str(i+1) + '.pkl'
        agent_dfs[i].to_pickle(directory + filename)

In [None]:
def log_world_homePher_data(directory):
    world_df = get_world_homePher_dataFrame()
    filename = 'world_home_pheromone.pkl'
    world_df.to_pickle(directory + filename)

In [None]:
def log_world_foodPher_data(directory):
    world_df = get_world_foodPher_dataFrame()
    filename = 'world_food_pheromone.pkl'
    world_df.to_pickle(directory + filename)

In [None]:
def log_data():

    timestamp = int(time.time())
    directory = './data/' + str(timestamp) + '_' + str(stepsCount)
    os.mkdir(directory)
    directory += '/'
    
    log_agents_data(directory)
    log_world_homePher_data(directory)
    log_world_foodPher_data(directory)

    params = get_params()
    filename = 'params.json'
    with open(directory+filename, 'w') as outfile:
        json.dump(params, outfile)

#### Executing

In [None]:
stepsCount = 10000;

In [None]:
start = time.time()
run()
end = time.time()

print(f"Runtime of the program is { round(end-start, 1) } s")

# cProfile.run('run()') # Profiling the code

In [None]:
start = time.time()
log_data()
end = time.time()

print(f"Time required to write data is { round(end-start, 1) } s")