In [None]:
import pandas as pd
import numpy as np
import warnings

from mesa import Model
from mesa.time import RandomActivation
from sklearn import ensemble
from mesa.space import MultiGrid
from mesa.datacollection import DataCollector
from scipy.stats import truncnorm
from t_agent import TerroristAgent
from c_agent import CivilianAgent
from m_agent import MilitaryAgent
from dqn_tf import DeepQNetwork
from hivemind_ter import HiveMindTer
from hivemind_mil import HiveMindMil
from gen_agents import GenAgents

warnings.filterwarnings("ignore", category=FutureWarning)

class MapModel(Model):

    def __init__(self, density=.1, height=50, width=50, map_size="Large", troop_size=10000,
                t_hive=HiveMindTer(gamma=0.99, epsilon=1.0, alpha=0.00025, input_dims=(1, 15, 1),
                                n_actions=5, mem_size=4000, batch_size=1),
                m_hive=HiveMindMil(gamma=0.99, epsilon=1.0, alpha=0.00025, input_dims=(1, 7, 1),
                                n_actions=4, mem_size=4000, batch_size=1)):

        self.height = height
        self.width = width
        self.density = density
        self.map_size = map_size
        self.gen_agents = GenAgents()
        self.load_checkpoints = True

        self.schedule = RandomActivation(self)
        self.grid = MultiGrid(height, width, False)

        self.terror_score = 0
        self.civilian_score = 0

        self.pred_agents = self.gen_agents.generate_pred_agents(10000)
        self.pred_model = self.train_model(self.pred_agents)
        self.t_hive = t_hive
        self.m_hive = m_hive

        self.datacollector = DataCollector({"Terrorist Epsilon": "t_epsilon", "Military Epsilon": "m_epsilon"})

        if self.load_checkpoints:
            self.t_hive.load_models()
            self.m_hive.load_models()

        self.t_epsilon = self.t_hive.epsilon
        self.m_epsilon = self.m_hive.epsilon

        self.metro_size = 10000
        self.metro_civ = int(self.metro_size * (1 - self.density))
        self.metro_ter = int(self.metro_size * self.density)

        self.city_size = 1000
        self.city_civ = int(self.city_size * (1 - self.density))
        self.city_ter = int(self.city_size * self.density)

        self.village = 50
        self.village_civ = int(self.village * (1 - self.density))
        self.village_ter = int(self.village * self.density)

        self.troop_size = troop_size

        if self.map_size == "Large":
            self.basecamp = int(self.troop_size * .8)
            self.outpost = int(self.troop_size * .2)
            self.metro_loc = {"X": 25, "Y": 25}
            self.city1_loc = {"X": 20, "Y": 20}
            self.city2_loc = {"X": 45, "Y": 25}
            self.village1_loc = {"X": 10, "Y": 45}
            self.village2_loc = {"X": 15, "Y": 30}
            self.village3_loc = {"X": 45, "Y": 45}
            self.basecamp_loc = {"X": 35, "Y": 10}
            self.outpost_loc = {"X": 30, "Y": 20}
            self.metro_t_agents = self.gen_agents.generate_ter_agents(self.metro_ter)
            self.metro_c_agents = self.gen_agents.generate_civ_agents(self.metro_civ)
            self.city1_t_agents = self.gen_agents.generate_ter_agents(self.city_ter)
            self.city1_c_agents = self.gen_agents.generate_civ_agents(self.city_civ)
            self.city2_t_agents = self.gen_agents.generate_ter_agents(self.city_ter)
            self.city2_c_agents = self.gen_agents.generate_civ_agents(self.city_civ)
            self.village1_t_agents = self.gen_agents.generate_ter_agents(self.village_ter)
            self.village1_c_agents = self.gen_agents.generate_civ_agents(self.village_civ)
            self.village2_t_agents = self.gen_agents.generate_ter_agents(self.village_ter)
            self.village2_c_agents = self.gen_agents.generate_civ_agents(self.village_civ)
            self.village3_t_agents = self.gen_agents.generate_ter_agents(self.village_ter)
            self.village3_c_agents = self.gen_agents.generate_civ_agents(self.village_civ)
            self.basecamp_agents = self.gen_agents.generate_mil_agents(self.basecamp)
            self.outpost_agents = self.gen_agents.generate_mil_agents(self.outpost)
            for x in range(len(self.metro_t_agents)):
                a = TerroristAgent('tm'+str(x), self, self.metro_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.metro_loc["X"], self.metro_loc["Y"]))
            for x in range(len(self.metro_c_agents)):
                a = CivilianAgent('cm'+str(x), self, self.metro_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.metro_loc["X"], self.metro_loc["Y"]))
            for x in range(len(self.city1_t_agents)):
                a = TerroristAgent('tc1'+str(x), self, self.city1_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city1_loc["X"], self.city1_loc["Y"]))
            for x in range(len(self.city1_c_agents)):
                a = CivilianAgent('cc1'+str(x), self, self.city1_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city1_loc["X"], self.city1_loc["Y"]))
            for x in range(len(self.city2_t_agents)):
                a = TerroristAgent('tc2'+str(x), self, self.city2_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city2_loc["X"], self.city2_loc["Y"]))
            for x in range(len(self.city2_c_agents)):
                a = CivilianAgent('cc2'+str(x), self, self.city2_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city2_loc["X"], self.city2_loc["Y"]))
            for x in range(len(self.village1_t_agents)):
                a = TerroristAgent('tv1'+str(x), self, self.village1_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.village1_loc["X"], self.village1_loc["Y"]))
            for x in range(len(self.village1_c_agents)):
                a = CivilianAgent('cv1'+str(x), self, self.village1_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.village1_loc["X"], self.village1_loc["Y"]))
            for x in range(len(self.village2_t_agents)):
                a = TerroristAgent('tv2'+str(x), self, self.village2_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.village2_loc["X"], self.village2_loc["Y"]))
            for x in range(len(self.village2_c_agents)):
                a = CivilianAgent('cv2'+str(x), self, self.village2_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.village2_loc["X"], self.village2_loc["Y"]))
            for x in range(len(self.village3_t_agents)):
                a = TerroristAgent('tv3'+str(x), self, self.village3_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.village3_loc["X"], self.village3_loc["Y"]))
            for x in range(len(self.village3_c_agents)):
                a = CivilianAgent('cv3'+str(x), self, self.village3_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.village3_loc["X"], self.village3_loc["Y"]))
            for x in range(len(self.basecamp_agents)):
                a = MilitaryAgent('mb'+str(x), self, self.basecamp_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.basecamp_loc["X"], self.basecamp_loc["Y"]))
            for x in range(len(self.outpost_agents)):
                a = MilitaryAgent('mo'+str(x), self, self.outpost_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.outpost_loc["X"], self.outpost_loc["Y"]))
            del self.metro_c_agents
            del self.metro_t_agents
            del self.city1_c_agents
            del self.city1_t_agents
            del self.city2_c_agents
            del self.city2_t_agents
            del self.village1_c_agents
            del self.village1_t_agents
            del self.village2_c_agents
            del self.village2_t_agents
            del self.village3_c_agents
            del self.village3_t_agents
            del self.basecamp
            del self.outpost
            del self.basecamp_agents
            del self.outpost_agents

        elif self.map_size == "Medium":
            self.basecamp = self.troop_size
            self.metro_loc = {"X": 25, "Y": 25}
            self.city1_loc = {"X": 20, "Y": 20}
            self.city2_loc = {"X": 45, "Y": 25}
            self.basecamp_loc = {"X": 30, "Y": 20}
            self.metro_t_agents = self.gen_agents.generate_ter_agents(self.metro_ter)
            self.metro_c_agents = self.gen_agents.generate_civ_agents(self.metro_civ)
            self.city1_t_agents = self.gen_agents.generate_ter_agents(self.city_ter)
            self.city1_c_agents = self.gen_agents.generate_civ_agents(self.city_civ)
            self.city2_t_agents = self.gen_agents.generate_ter_agents(self.city_ter)
            self.city2_c_agents = self.gen_agents.generate_civ_agents(self.city_civ)
            self.basecamp_agents = self.gen_agents.generate_mil_agents(self.troop_size)
            for x in range(len(self.metro_t_agents)):
                a = TerroristAgent('tm'+str(x), self, self.metro_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.metro_loc["X"], self.metro_loc["Y"]))
            for x in range(len(self.metro_c_agents)):
                a = CivilianAgent('cm'+str(x), self, self.metro_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.metro_loc["X"], self.metro_loc["Y"]))
            for x in range(len(self.city1_t_agents)):
                a = TerroristAgent('tc1'+str(x), self, self.city1_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city1_loc["X"], self.city1_loc["Y"]))
            for x in range(len(self.city1_c_agents)):
                a = CivilianAgent('cc1'+str(x), self, self.city1_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city1_loc["X"], self.city1_loc["Y"]))
            for x in range(len(self.city2_t_agents)):
                a = TerroristAgent('tc2'+str(x), self, self.city2_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city2_loc["X"], self.city2_loc["Y"]))
            for x in range(len(self.city2_c_agents)):
                a = CivilianAgent('cc2'+str(x), self, self.city2_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.city2_loc["X"], self.city2_loc["Y"]))
            for x in range(len(self.basecamp_agents)):
                a = MilitaryAgent('mb'+str(x), self, self.basecamp_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.basecamp_loc["X"], self.basecamp_loc["Y"]))

            del self.metro_c_agents
            del self.metro_t_agents
            del self.city1_c_agents
            del self.city1_t_agents
            del self.city2_c_agents
            del self.city2_t_agents
            del self.basecamp
            del self.basecamp_agents

        elif self.map_size == "Small":
            self.basecamp = self.troop_size
            self.metro_loc = {"X": 25, "Y": 25}
            self.basecamp_loc = {"X": 30, "Y": 20}
            self.metro_t_agents = self.gen_agents.generate_ter_agents(self.metro_ter)
            self.metro_c_agents = self.gen_agents.generate_civ_agents(self.metro_civ)
            self.basecamp_agents = self.gen_agents.generate_mil_agents(self.troop_size)
            for x in range(len(self.metro_t_agents)):
                a = TerroristAgent('tm'+str(x), self, self.metro_t_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.metro_loc["X"], self.metro_loc["Y"]))
            for x in range(len(self.metro_c_agents)):
                a = CivilianAgent('cm'+str(x), self, self.metro_c_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.metro_loc["X"], self.metro_loc["Y"]))
            for x in range(len(self.basecamp_agents)):
                a = MilitaryAgent('mb'+str(x), self, self.basecamp_agents[x:x+1])
                self.schedule.add(a)
                self.grid.place_agent(a, (self.basecamp_loc["X"], self.basecamp_loc["Y"]))

            del self.metro_c_agents
            del self.metro_t_agents
            del self.basecamp
            del self.basecamp_agents

        del self.pred_agents
        del self.metro_size
        del self.metro_civ
        del self.metro_ter
        del self.city_civ
        del self.city_ter
        del self.village_civ
        del self.village_ter

        self.set_terror_score()
        self.set_civil_score()

        self.running = True

    def step(self):
        if self.get_agent_count('Terrorist') >= 1:
            self.schedule.step()
            self.t_epsilon = self.t_hive.epsilon
            self.m_epsilon = self.m_hive.epsilon
            self.datacollector.collect(self)
            if self.schedule.steps % 5 == 0:
                self.t_hive.save_models()
                self.m_hive.save_models()
        else:
            self.running = False

    def get_agent_count(self, type):
        count = 0
        for agent in self.schedule.agents:
            if agent.type == type:
                count += 1

        return count

    def get_agent_list(self, type):
        agents = []
        for agent in self.schedule.agents:
            if agent.type == type:
                agents.append(agent)

        return agents

    def set_terror_score(self):
        t_count = self.get_agent_count('Terrorist')
        c_count = self.get_agent_count('Civilian')
        m_count = self.get_agent_count('Military')

        if t_count >= c_count:
            self.terror_score = t_count - (c_count/2) - m_count
        else:
            self.terror_score = t_count - c_count - m_count

    def set_civil_score(self):
        t_count = self.get_agent_count('Terrorist')
        c_count = self.get_agent_count('Civilian')
        m_count = self.get_agent_count('Military')

        self.civilian_score = c_count + (m_count / 2) - t_count

    def get_same_square_agents(self, x_pos, y_pos):
        agents = []
        for agent in self.schedule.agents:
            if agent.pos[0] == x_pos and agent.pos[1] == y_pos:
                agents.append(agent)

        return agents

    def get_same_square_type_agents(self, x_pos, y_pos, type):
        agents = []
        for agent in self.schedule.agents:
            if agent.pos[0] == x_pos and agent.pos[1] == y_pos and agent.type == type:
                agents.append(agent)

        return agents

    def get_neighbor_type(self, agent, type):
        agents = self.grid.neighbor_iter((agent.pos[0], agent.pos[1]), moore=True)
        refined = []
        for agent in agents:
            if agent.type == type:
                refined.append(agent)
                
        return refined

    def find_nearest_agent(self, agent1, agents):
        dists = []
        x_pos = agent1.pos[0]
        y_pos = agent1.pos[1]
        for agent in agents:
            x = agent.pos[0]
            x2 = (x - x_pos) ** 2
            y = agent.pos[1]
            y2 = (y - y_pos) ** 2
            dist = np.sqrt(x2 + y2)
            dists.append(dist)

        min_index = dists.index(min(dists))

        return agents[min_index]

    def move_toward_nearest(self, agent1, agent2):
        x = (agent1.pos[0] - agent2.pos[0]) // -2
        y = (agent1.pos[1] - agent2.pos[1]) // -2


        if x > 0:
            x = 1
        elif x == 0:
            x = 0
        elif x < 0:
            x = -1

        if y > 0:
            y = 1
        elif y == 0:
            y = 0
        elif y < 0:
            y = -1

        return x, y

    def add_terrorist(self, agent, x_pos, y_pos):
        a = TerroristAgent('t'+agent.unique_id, self, agent)
        self.schedule.add(a)
        self.grid.place_agent(a, (x_pos, y_pos))


    def train_model(self, agents):
        rfg = ensemble.RandomForestRegressor()
        X = agents.drop(['prob_threat'], 1)
        Y = agents.prob_threat

        rfg.fit(X, Y)

        return rfg

In [None]:
from mesa import Agent
import numpy as np

class TerroristAgent(Agent):

    def __init__(self, unique_id, model, agent):
        super().__init__(unique_id, model)
        
        self.wounded = False
        self.wounded_count = 0
        self.age = int(agent.age)
        self.gender = int(agent.gender)
        self.religion = int(agent.religion)
        self.char_list = ['agr_bhv', 'rel_fnt', 'rel_conv', 'hst_twd_for', 'lvl_rct_act', 'crt_agr_lvl']
        self.agr_bhv = float(agent.agr_bhv)
        self.rel_fnt = float(agent.rel_fnt)
        self.rel_conv = float(agent.rel_conv)
        self.hst_twd_for = float(agent.hst_twd_for)
        self.lvl_rct_act = float(agent.lvl_rct_act)
        self.crt_agr_lvl = float(agent.crt_agr_lvl)
        self.prob_threat = 0
        self.type = 'Terrorist'
        self.state = [self.gender, self.religion, self.agr_bhv, self.rel_fnt, self.rel_conv,
                        self.hst_twd_for, self.lvl_rct_act, self.crt_agr_lvl, self.model.terror_score,
                        self.model.civilian_score, 0, 0, self.model.get_agent_count('Terrorist'), 
                        self.model.get_agent_count('Civilian'), self.model.get_agent_count('Military')]

    def step(self):
        self.grow()
        if not self.wounded:
            self.choose_action(self.model.t_hive.choose_action(np.expand_dims(np.array(self.state).reshape((1, 15, 1)), 1)))
        else:
            if self.wounded_count > 0:
                self.wounded_count -= 1
            else:
                self.wounded = False
        self.model.t_hive.learn()
        self.model.t_gamma = self.model.t_hive.gamma
            
    def grow(self):
        if((self.agr_bhv >= .75) or (self.rel_fnt >= .75) or (self.hst_twd_for >= .75) or (self.crt_agr_lvl >= .65)):
            self.crt_agr_lvl += .005
        if((self.agr_bhv <= .25) or (self.rel_fnt <= .25) or (self.hst_twd_for <= .25) or (self.crt_agr_lvl <= .25)):
            self.crt_agr_lvl -= .005
        if((self.agr_bhv >= .75) and ((self.rel_fnt > .75) or (self.hst_twd_for) >= .75)):
            self.crt_agr_lvl += .05
        if((self.agr_bhv <= .25) and ((self.rel_fnt < .25) or (self.hst_twd_for) <= .25)):
            self.crt_agr_lvl +- .05

        self.agr_bhv += 0.00001
        self.rel_fnt += 0.00001
        self.rel_conv += 0.00001
        self.hst_twd_for += 0.00001
        self.crt_agr_lvl += 0.00001
        
        if np.random.random() <= 0.05:
            choice = np.random.choice(self.char_list)
            attr_value = getattr(self, choice)
            setattr(self, choice, attr_value * np.random.random())

        self.prob_threat = float(self.model.pred_model.predict([[self.age, self.gender, self.religion, self.agr_bhv, self.rel_fnt,
                                                self.rel_conv, self.hst_twd_for, self.lvl_rct_act, self.crt_agr_lvl]]))
    
    def choose_action(self, action):
        if action == 0:
            state = np.array(self.state).reshape((1, 15 ,1))
            t_score = self.model.terror_score
            agents = self.model.get_same_square_agents(self.pos[0], self.pos[1])
            deaths = np.array([1,2])#,3,4,5,6,7,8,9,10,25])
            choice = np.random.choice(deaths)
            if len(agents) > choice:
                killed_agents = np.random.choice(agents, choice, replace=False)
                for agent in killed_agents:
                        self.model.schedule.remove(agent)
                self.model.schedule.remove(self)
            self.model.set_terror_score()
            self.model.set_civil_score()
            t_score_ = self.model.terror_score
            state_ = np.array([self.gender, self.religion, 0, 0, 0, 0, 0, 0, self.model.terror_score, self.model.civilian_score,
                        self.pos[0], self.pos[1], self.model.get_agent_count('Terrorist'),
                        self.model.get_agent_count('Civilian'), self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1,15,1))
            if t_score >= t_score_:
                reward = -1
            else:
                reward = 1
            self.model.t_hive.store_transition(state, action, reward, state_)
            '''
            Remove this agent from the schedule
            Remove a random number of agents on this square from the schedule
            '''
        elif action == 1:
            state = np.array(self.state).reshape((1, 15, 1))
            t_score = self.model.terror_score
            agents = self.model.get_same_square_type_agents(self.pos[0], self.pos[1], 'Civilian')
            if len(agents) > 0:
                selected_agent = np.random.choice(agents)
                if selected_agent.rel_conv <= self.rel_conv:
                    self.model.add_terrorist(selected_agent, self.pos[0], self.pos[1])
                    self.model.schedule.remove(selected_agent)
            self.model.set_terror_score()
            self.model.set_civil_score()
            t_score_ = self.model.terror_score
            state_ = np.array([self.gender, self.religion, self.agr_bhv, self.rel_fnt, self.rel_conv,
                        self.hst_twd_for, self.lvl_rct_act, self.crt_agr_lvl, self.model.terror_score,
                        self.model.civilian_score, self.pos[0], self.pos[1], self.model.get_agent_count('Terrorist'), 
                        self.model.get_agent_count('Civilian'), self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 15, 1))
            if t_score >= t_score_:
                reward = -1
            else:
                reward = 1
            self.model.t_hive.store_transition(state, action, reward, state_)
            '''
            Find a random civilian agent on the same square.
            If civilian agent rel_conv < agent rel_conv, civilian agent becomes a terrorist agent
            '''
        elif action == 2:
            reward = 0
            state = np.array(self.state).reshape((1, 15, 1))
            t_score = self.model.terror_score
            mil_neighbors = self.model.get_neighbor_type(self, 'Military')
            civ_neighbors = self.model.get_neighbor_type(self, 'Civilian')
            if len(mil_neighbors) > 0:
                choice = np.random.choice(mil_neighbors)
                rand = np.random.random()
                if rand >= 0.7:
                    choice.wounded = True
                    choice.wounded_count = 3
                elif rand <= 0.2 and rand > 0.05:
                    self.model.schedule.remove(choice)
                    reward += 1
                elif rand <= 0.05:
                    if len(civ_neighbors) > 0:
                        choice2 = np.random.choice(civ_neighbors)
                        self.model.schedule.remove(choice2)
            self.model.set_terror_score()
            self.model.set_civil_score()
            t_score_ = self.model.terror_score
            state_ = np.array([self.gender, self.religion, self.agr_bhv, self.rel_fnt, self.rel_conv,
                        self.hst_twd_for, self.lvl_rct_act, self.crt_agr_lvl, self.model.terror_score,
                        self.model.civilian_score, self.pos[0], self.pos[1], self.model.get_agent_count('Terrorist'), 
                        self.model.get_agent_count('Civilian'), self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 15, 1))
            if t_score >= t_score_:
                reward += -1
            else:
                reward += 1
            self.model.t_hive.store_transition(state, action, reward, state_)
            '''
            Find a military agent within 1 square of agent.
            30% chance to wound, 20% to kill. 
            5% chance to kill civilian
            '''
        elif action == 3:
            state = np.array(self.state).reshape((1, 15, 1))
            t_score = self.model.terror_score
            agents = self.model.get_agent_list('Military')
            if len(agents) > 0:
                nearest = self.model.find_nearest_agent(self, agents)
                x, y = self.model.move_toward_nearest(self, nearest)
                self.model.grid.move_agent(self, (self.pos[0]+x, self.pos[1]+y))
            self.model.set_terror_score()
            self.model.set_civil_score()
            t_score_ = self.model.terror_score
            state_ = np.array([self.gender, self.religion, self.agr_bhv, self.rel_fnt, self.rel_conv,
                        self.hst_twd_for, self.lvl_rct_act, self.crt_agr_lvl, self.model.terror_score,
                        self.model.civilian_score, self.pos[0], self.pos[1], self.model.get_agent_count('Terrorist'), 
                        self.model.get_agent_count('Civilian'), self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 15, 1))
            if t_score >= t_score_:
                reward = -1
            else:
                reward = 1
            self.model.t_hive.store_transition(state, action, reward, state_)
            '''
            Find the nearest military agent and move toward.
            '''
        elif action == 4:
            state = np.array(self.state).reshape((1, 15, 1))
            t_score = self.model.terror_score
            self.model.set_terror_score()
            self.model.set_civil_score()
            t_score_ = self.model.terror_score
            state_ = np.array([self.gender, self.religion, self.agr_bhv, self.rel_fnt, self.rel_conv,
                        self.hst_twd_for, self.lvl_rct_act, self.crt_agr_lvl, self.model.terror_score,
                        self.model.civilian_score, self.pos[0], self.pos[1], self.model.get_agent_count('Terrorist'), 
                        self.model.get_agent_count('Civilian'), self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 15, 1))
            if t_score >= t_score_:
                reward = -1
            else:
                reward = 1
            self.model.t_hive.store_transition(state, action, reward, state_)

In [None]:
from mesa import Agent
import numpy as np

class MilitaryAgent(Agent):
    
    def __init__(self, unique_id, model, agent):
        super().__init__(unique_id, model)
        
        self.wounded = False
        self.wounded_count = 0
        self.state = [self.model.terror_score, self.model.civilian_score, 0, 0,
                    self.model.get_agent_count('Terrorist'), self.model.get_agent_count('Civilian'),
                    self.model.get_agent_count('Military')]
        self.type = "Military"
        
    def step(self):
        if not self.wounded:
            self.choose_action(self.model.m_hive.choose_action(np.expand_dims(np.array(self.state).reshape((1, 7, 1)), 1)))
        else:
            if self.wounded_count > 0:
                self.wounded_count -= 1
            else:
                self.wounded = False
        self.model.m_hive.learn()
        self.model.m_gamma = self.model.m_hive.gamma
        
    def choose_action(self, action):
        self.action = action
        if self.action == 0:
            state = np.array(self.state).reshape((1, 7, 1))
            c_score = self.model.civilian_score
            agents = self.model.get_agent_list('Terrorist')
            if len(agents) > 0:
                nearest = self.model.find_nearest_agent(self, agents)
                x, y = self.model.move_toward_nearest(self, nearest)
                self.model.grid.move_agent(self, (self.pos[0]+x, self.pos[1]+y))
            self.model.set_terror_score()
            self.model.set_civil_score()
            c_score_ = self.model.civilian_score
            state_ = np.array([self.model.terror_score,    self.model.civilian_score, self.pos[0], self.pos[1],
                        self.model.get_agent_count('Terrorist'), self.model.get_agent_count('Civilian'),
                        self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 7, 1))
            if c_score >= c_score_:
                reward = -1
            else:
                reward = 1
            self.model.m_hive.store_transition(state, action, reward, state_)
            '''
            Agent find nearest terrorist agent and moves toward.
            '''
        elif self.action == 1:
            state = np.array(self.state).reshape((1, 7, 1))
            c_score = self.model.civilian_score
            reward = 0
            agents = self.model.get_same_square_type_agents(self.pos[0], self.pos[1], 'Terrorist')
            if len(agents) > 0:
                selected_agent = np.random.choice(agents)
                rand = np.random.random()
                if rand <= 0.4:
                    self.model.schedule.remove(selected_agent)
                    reward += 2
            self.model.set_terror_score()
            self.model.set_civil_score()
            c_score_ = self.model.civilian_score
            state_ = np.array([self.model.terror_score,    self.model.civilian_score, self.pos[0], self.pos[1],
                        self.model.get_agent_count('Terrorist'), self.model.get_agent_count('Civilian'),
                        self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 7, 1))
            if c_score >= c_score_:
                reward = -1
            else:
                reward = 1
            self.model.m_hive.store_transition(state, action, reward, state_)
            '''
            Agent randomly chooses a terrorist agent within the same square
            40% success rate, reward is 2-3x larger
            '''
        elif self.action == 2:
            reward = 0
            state = np.array(self.state).reshape((1, 7, 1))
            c_score = self.model.civilian_score
            ter_neighbors = self.model.get_neighbor_type(self, 'Terrorist')
            civ_neighbors = self.model.get_neighbor_type(self, 'Civilian')
            if len(ter_neighbors) > 0:
                choice = np.random.choice(ter_neighbors)
                rand = np.random.random()
                if rand >= 0.6:
                    choice.wounded = True
                    choice.wounded_count = 3
                elif rand <= 0.4 and rand > 0.05:
                    self.model.schedule.remove(choice)
                elif rand <= 0.05:
                    if len(civ_neighbors) > 0:
                        choice2 = np.random.choice(civ_neighbors)
                        self.model.schedule.remove(choice2)
                        reward -= 1
            self.model.set_terror_score()
            self.model.set_civil_score()
            c_score_ = self.model.civilian_score
            state_ = np.array([self.model.terror_score,    self.model.civilian_score, self.pos[0], self.pos[1],
                        self.model.get_agent_count('Terrorist'), self.model.get_agent_count('Civilian'),
                        self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 7, 1))
            if c_score >= c_score_:
                reward += -1
            else:
                reward += 1
            self.model.m_hive.store_transition(state, action, reward, state_)
            '''
            Agent randomly selects a terrorist agent within 1 square and attacks.
            60% to wound, 40% to kill.
            5% to kill civilian agent
            '''
        elif self.action == 3:
            state = np.array(self.state).reshape((1, 7, 1))
            c_score = self.model.civilian_score
            self.model.set_terror_score()
            self.model.set_civil_score()
            c_score_ = self.model.civilian_score
            state_ = np.array([self.model.terror_score,    self.model.civilian_score, self.pos[0], self.pos[1],
                        self.model.get_agent_count('Terrorist'), self.model.get_agent_count('Civilian'),
                        self.model.get_agent_count('Military')])
            self.state = state_
            state_ = state_.reshape((1, 7, 1))
            if c_score >= c_score_:
                reward = -1
            else:
                reward = 1
            self.model.m_hive.store_transition(state, action, reward, state_)

In [None]:
from mesa import Agent

class CivilianAgent(Agent):

    def __init__(self, unique_id, model, agent):
        super().__init__(unique_id, model)

        self.age = int(agent.ages)
        self.gender = int(agent.gender)
        self.religion = int(agent.religion)
        self.agr_bhv = float(agent.agr_bhv)
        self.rel_fnt = float(agent.rel_fnt)
        self.rel_conv = float(agent.rel_conv)
        self.hst_twd_for = float(agent.hst_twd_for)
        self.lvl_rct_act = float(agent.lvl_rct_act)
        self.crt_agr_lvl = float(agent.crt_agr_lvl)
        self.prob_threat = 0
        self.type = 'Civilian'
        
    def step(self):
        pass

In [None]:
from dqn_tf import DeepQNetwork
import os
import numpy as np
import tensorflow as tf

class HiveMindMil(object):
    def __init__(self, alpha, gamma, mem_size, n_actions, epsilon, 
                batch_size, replace_target=5000, input_dims=(1, 4, 1), 
                q_next_dir='tmp/mil/q_next', q_eval_dir='tmp/mil/q_eval'):
        
        self.n_actions = n_actions
        self.action_space = [ i for i in range(self.n_actions)]
        self.gamma = gamma
        self.mem_size = mem_size
        self.epsilon = epsilon
        self.batch_size = batch_size
        self.mem_cntr = 0
        self. replace_target = replace_target
        self.q_next = DeepQNetwork(alpha, n_actions, input_dims=input_dims, 
                                    name='q_next', chkpt_dir=q_next_dir)
        self.q_eval = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
                                    name='q_eval', chkpt_dir=q_eval_dir)
        self.state_memory = np.zeros((self.mem_size, *input_dims))
        self.new_state_memory = np.zeros((self.mem_size, *input_dims))
        self.action_memory = np.zeros((self.mem_size, self.n_actions), dtype=np.int8)
        self.reward_memory = np.zeros(self.mem_size)
        
    def store_transition(self, state, action, reward, state_):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        actions = np.zeros(self.n_actions)
        actions[action] = 1.0
        self.action_memory[index] = actions
        self.reward_memory[index] = reward
        self.new_state_memory[index] = state_
        
        self.mem_cntr += 1
        
    def choose_action(self, state):
        rand = np.random.random()
        if rand < self.epsilon:
            action = np.random.choice(self.action_space)
        else:
            actions = self.q_eval.sess.run(self.q_eval.Q_values,
                        feed_dict={self.q_eval.input: state})
            action = np.argmax(actions)
        return action
        
    def learn(self):
        if self.mem_cntr % self.replace_target == 0:
            self.update_graph()
        max_mem = self.mem_cntr if self.mem_cntr < self.mem_size else self.mem_size
        batch = np.random.choice(max_mem, self.batch_size)
        state_batch = self.state_memory[batch]
        action_batch = self.action_memory[batch]
        action_values = np.array([0, 1, 2, 3], dtype=np.int8)
        action_indices = np.dot(action_batch, action_values)
        reward_batch = self.reward_memory[batch]
        new_state_batch = self.new_state_memory[batch]
        
        q_eval = self.q_eval.sess.run(self.q_eval.Q_values,
                            feed_dict={self.q_eval.input: state_batch})
        q_next = self.q_next.sess.run(self.q_next.Q_values,
                            feed_dict={self.q_next.input: new_state_batch})
                            
        q_target = q_eval.copy()
        q_target[:, action_indices] = reward_batch + \
                self.gamma*np.max(q_next, axis=1)
                
        _ = self.q_eval.sess.run(self.q_eval.train_op,
                                feed_dict={self.q_eval.input: state_batch,
                                            self.q_eval.actions: action_batch,
                                            self.q_eval.q_target: q_target})
                                            
        if self.mem_cntr > 10000:
            if self.epsilon > 0.05:
                self.epsilon -= 4e-7
            elif self.epsilon <= 0.05:
                self.epsilon = 0.05
                
    def save_models(self):
        self.q_eval.save_checkpoint()
        self.q_next.save_checkpoint()
        
    def load_models(self):
        self.q_eval.load_checkpoint()
        self.q_next.load_checkpoint()
        
    def update_graph(self):
        t_params = self.q_next.params
        e_params = self.q_eval.params
        
        for t, e, in zip (t_params, e_params):
            self.q_eval.sess.run(tf.assign(t,e))
            
            

In [None]:
from dqn_tf import DeepQNetwork
import tensorflow as tf
import os
import numpy as np

class HiveMindTer(object):
    def __init__(self, alpha, gamma, mem_size, n_actions, epsilon, 
                batch_size, replace_target=5000, input_dims=(1, 15, 1), 
                q_next_dir='tmp/ter/q_next', q_eval_dir='tmp/ter/q_eval'):
        
        self.n_actions = n_actions
        self.action_space = [ i for i in range(self.n_actions)]
        self.gamma = gamma
        self.mem_size = mem_size
        self.epsilon = epsilon
        self.batch_size = batch_size
        self.mem_cntr = 0
        self. replace_target = replace_target
        self.q_next = DeepQNetwork(alpha, n_actions, input_dims=input_dims, 
                                    name='ter_q_next', chkpt_dir=q_next_dir)
        self.q_eval = DeepQNetwork(alpha, n_actions, input_dims=input_dims,
                                    name='ter_q_eval', chkpt_dir=q_eval_dir)
        self.state_memory = np.zeros((self.mem_size, *input_dims))
        self.new_state_memory = np.zeros((self.mem_size, *input_dims))
        self.action_memory = np.zeros((self.mem_size, self.n_actions), dtype=np.int8)
        self.reward_memory = np.zeros(self.mem_size)
        
    def store_transition(self, state, action, reward, state_):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        actions = np.zeros(self.n_actions)
        actions[action] = 1.0
        self.action_memory[index] = actions
        self.reward_memory[index] = reward
        self.new_state_memory[index] = state_
        
        self.mem_cntr += 1
        
    def choose_action(self, state):
        rand = np.random.random()
        if rand < self.epsilon:
            action = np.random.choice(self.action_space)
        else:
            actions = self.q_eval.sess.run(self.q_eval.Q_values,
                        feed_dict={self.q_eval.input: state})
            action = np.argmax(actions)
        return action
        
    def learn(self):
        if self.mem_cntr % self.replace_target == 0:
            self.update_graph()
        max_mem = self.mem_cntr if self.mem_cntr < self.mem_size else self.mem_size
        batch = np.random.choice(max_mem, self.batch_size)
        state_batch = self.state_memory[batch]
        action_batch = self.action_memory[batch]
        action_values = np.array([0, 1, 2, 3, 4], dtype=np.int8)
        action_indices = np.dot(action_batch, action_values)
        reward_batch = self.reward_memory[batch]
        new_state_batch = self.new_state_memory[batch]
        
        q_eval = self.q_eval.sess.run(self.q_eval.Q_values,
                            feed_dict={self.q_eval.input: state_batch})
        q_next = self.q_next.sess.run(self.q_next.Q_values,
                            feed_dict={self.q_next.input: new_state_batch})
                            
        q_target = q_eval.copy()
        q_target[:, action_indices] = reward_batch + \
                self.gamma*np.max(q_next, axis=1)
                
        _ = self.q_eval.sess.run(self.q_eval.train_op,
                                feed_dict={self.q_eval.input: state_batch,
                                            self.q_eval.actions: action_batch,
                                            self.q_eval.q_target: q_target})
                                            
        if self.mem_cntr > 10000:
            if self.epsilon > 0.05:
                self.epsilon -= 4e-7
            elif self.epsilon <= 0.05:
                self.epsilon = 0.05
                
    def save_models(self):
        self.q_eval.save_checkpoint()
        self.q_next.save_checkpoint()
        
    def load_models(self):
        self.q_eval.load_checkpoint()
        self.q_next.load_checkpoint()
        
    def update_graph(self):
        t_params = self.q_next.params
        e_params = self.q_eval.params
        
        for t, e, in zip (t_params, e_params):
            self.q_eval.sess.run(tf.assign(t,e))
            
            

In [None]:
from mesa.visualization.ModularVisualization import ModularServer
from mesa.visualization.modules import CanvasGrid, ChartModule, TextElement
from mesa.visualization.UserParam import UserSettableParameter

from model import MapModel

class CCountElement(TextElement):
    def __init__(self):
        pass
    
    def render(self, model):
        return "# of Civilians: " + str(model.get_agent_count('Civilian')) + " Civil Score: " + str(model.civilian_score)
        

class TCountElement(TextElement):
    def __init__(self):
        pass
    
    def render(self, model):
        return "# of Terrorists: " + str(model.get_agent_count('Terrorist')) + " Terror Score: " + str(model.terror_score)
        
class MCountElement(TextElement):
    def __init__(self):
        pass
    
    def render(self, model):
        return "# of Troops: " + str(model.get_agent_count('Military'))
        

def get_model_params():

    height = None
    width = None
    
    map_size = UserSettableParameter("choice", "Map size", value="Large", choices=["Small", "Medium", "Large"])
    if map_size.value == "Large":
        height = 50
        width = 50
    elif map_size.value == "Medium":
        height = 25
        width = 25
    elif map_size.value == "Small":
        height = 10
        width = 10
    density = UserSettableParameter("slider", "Terrorist density", 0.25, 0.00, 1.00, 0.25)
    troop_size = UserSettableParameter("number", "Troop size", 10000)
    
    model_params = {"height": height, "width": width, "density": density, "map_size": map_size, "troop_size": troop_size}
    return model_params

def mapmodel_draw(agent):
    
    if agent is None:
        return
    portrayal = {"Shape": "circle", "r": 0.8, "Filled": "true", "Layer": 0}
    
    if agent.type == "Terrorist":
        portrayal["Color"] = ["#FF0000"]
        portrayal["stroke_color"] = "#000000"
    elif agent.type == "Civilian":
        portrayal["Color"] = ["#00ff00"]
        portrayal["stroke_color"] = "#000000"
    elif agent.type == "Military":
        portrayal["Color"] = ["#0000FF"]
        portrayal["stroke_color"] = "#000000"
    return portrayal

model_params = get_model_params()

c_count_element = CCountElement()
t_count_element = TCountElement()
m_count_element = MCountElement()

canvas_element = CanvasGrid(mapmodel_draw, model_params["height"], model_params["width"], 500, 500)
ter_gamma_chart = ChartModule([{"Label": "Terrorist Epsilon", "Color": "Red"}, {"Label": "Military Epsilon", "Color": "Blue"}])

server = ModularServer(MapModel,
                        [canvas_element, c_count_element, t_count_element, m_count_element, ter_gamma_chart],
                        "Terrorist Response", model_params)
                        
server.launch()

In [None]:
import os
import numpy as np
import tensorflow as tf

class DeepQNetwork(object):
    def __init__(self, lr, n_actions, name, fc1_dims=256,
                 input_dims=(1,13), chkpt_dir='tmp/dqn'):
        self.lr = lr
        self.name = name
        self.n_actions = n_actions
        self.fc1_dims = fc1_dims
        self.chkpt_dir = chkpt_dir
        self.input_dims = input_dims
        self.sess = tf.Session()
        self.build_network()
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()
        self.checkpoint_file = os.path.join(chkpt_dir, 'deepqnet.ckpt')
        self.params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                        scope=self.name)
        
    def build_network(self):
        with tf.variable_scope(self.name):
            self.input = tf.placeholder(tf.float32, shape=[None, *self.input_dims],
                                        name='inputs')
            self.actions = tf.placeholder(tf.float32, shape=[None, self.n_actions],
                                        name='action_taken')
            self.q_target = tf.placeholder(tf.float32, shape=[None, self.n_actions],
                                        name='q_value')
            
            conv1 = tf.layers.conv2d(inputs=self.input, filters=32,
                                    kernel_size=(1,1), strides=4, name='conv1',
                kernel_initializer=tf.variance_scaling_initializer(scale=2))
            conv1_activated = tf.nn.relu(conv1)
            
            conv2 = tf.layers.conv2d(inputs=conv1_activated, filters=64,
                                    kernel_size=(1,1), strides=2, name='conv2',
                kernel_initializer=tf.variance_scaling_initializer(scale=2))
            conv2_activated = tf.nn.relu(conv2)
            
            conv3 = tf.layers.conv2d(inputs=conv2_activated, filters=128, 
                                    kernel_size=(1,1), strides=1, name='conv3',
                kernel_initializer=tf.variance_scaling_initializer(scale=2))
            conv3_activated = tf.nn.relu(conv3)
            
            flat = tf.layers.flatten(conv3_activated)
            dense1 = tf.layers.dense(flat, units=self.fc1_dims, activation=tf.nn.relu,
                                    kernel_initializer=tf.variance_scaling_initializer(scale=2))
            self.Q_values = tf.layers.dense(dense1, units=self.n_actions,
                                    kernel_initializer=tf.variance_scaling_initializer(scale=2))
            
            #self.q = tf.reduce_sum(tf.multiply(self.Q_values, self.actions))
            
            self.loss = tf.reduce_mean(tf.square(self.Q_values - self.q_target))
            
            self.train_op = tf.train.AdamOptimizer(self.lr).minimize(self.loss)
            
    def load_checkpoint(self):
            print('... loading checkpoint ...')
            self.saver.restore(self.sess, self.checkpoint_file)
            
    def save_checkpoint(self):
            print('... saving checkpoint ...')
            self.saver.save(self.sess, self.checkpoint_file)

In [None]:
from server import server

server.launch()