In [1]:
WITHGUI=True
DRAWING=True
# https://github.com/peterszabo77/2Dforaging/blob/master/ForagingModel.py
#FORAGING MODEL
L=100 #side length of arena
FORAGER_SIZE = 1 # forager size (radius)
FORAGER_INTERACTION_RADIUS = 10.0 # forager interaction radius
FORAGER_SPEED = 10.0 # forager speed
FORAGER_DENSITY = 0.001 # density of foragers
N_FORAGERS = int(L*L*FORAGER_DENSITY)
FOOD_BR = 0.002 # per unit area
FOOD_DR = 0.1 # per food item
FOOD_SIZE = 1 # forager size (radius)
ELASTIC_COLLISIONS = False

DT=0.05 # integration step
DIR_RESOL = 10 # resolution/number of movement directions
N_ACTIONS = DIR_RESOL
FORAGER_VISRESOL = 11 #resolution of perceptive field (odd number)

#REINFORCEMENT LEARNING PARAMETERS
BATCH_SIZE = 100
TRAINING_STEPS = 10000 # number of training steps
TRAINING_START = 1000 # start training after some iterations
TRAINING_INTERVAL = 100 # run a training step every ... game iterations
MEMORY_SIZE = TRAINING_INTERVAL*N_FORAGERS # replay memory size
COPY_STEPS = 1 # copy the critic to the actor every ... training steps
DISCOUNT_RATE = 0.95
EPSILON = 0.01
INIT_LEARNING_RATE = 0.001 #critic DQN's learning rate
LEARNING_RATE_LIFETIME = 500 # learning steps between halving the learning rate
EXPLORATION_STRATEGY =  'e-greedy' # 'e-greedy' or 'softmax'


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from math import floor, ceil

def getPaddingForSAMEConv(L_in, kernel_size, stride, dilation):
	#https://pytorch.org/docs/stable/nn.html
	if L_in % stride==0:
		doublepadding = max(dilation*(kernel_size - 1) + 1 - stride, 0)
	else:
		doublepadding = max(dilation*(kernel_size - 1) + 1 - (L_in % stride), 0)
	padding=int(doublepadding/2)
	if doublepadding % 2==0:
		return(padding)
	else:
		print('Proper SAME padding is not possible with these L_in, kernelsize, stride, dilation parameters.')
		exit(-1)

def getLoutForSAMEConv(L_in, stride):
	#https://pytorch.org/docs/stable/nn.html
	L_out=ceil(float(L_in) / float(stride))
	return(L_out)

def getLoutForVALIDConv(L_in, padding, kernel_size, stride, dilation):
	#https://pytorch.org/docs/stable/nn.html
	L_out=floor((float(L_in) + 2 * padding - dilation * (kernel_size - 1) - 1) / float(stride)+1)
	return(L_out)

# CNN attributes
# image
image_width = FORAGER_VISRESOL # the width of the input layer (visual information) 
image_channels = 2 # number of features (one value for each site)
# convolutional layers
C_outs = [32, 64, 64] # number of extracted features in subsequent convolutional layers
C_ins = [image_channels]+C_outs[0:-1] # number of input channels (features)
dilation=1
conv_kernel_sizes = [3, 3, 3] # convolutional kernel widths in subsequent convolutional layers
conv_strides = [1, 1, 1] # convolution strides in subsequent convolutional layers
L_ins=[]
L_outs=[]
conv_paddings=[]
for i in range(len(conv_kernel_sizes)):
	if i==0:
		L_ins.append(image_width)
	else:
		L_ins.append(L_outs[i-1])
	conv_paddings.append(getPaddingForSAMEConv(L_ins[i], conv_kernel_sizes[i], conv_strides[i], dilation))
	L_outs.append(getLoutForSAMEConv(L_ins[i], conv_strides[i]))
# fully connected layers
n_hidden_ins = L_outs[-1]*L_outs[-1]*C_outs[-1] # number of inputs in the first fully connected layer
n_hidden = 512 # number of cells in the first fully connected layer
n_outputs=DIR_RESOL # number of cells in the second fully connected layer (outputs)

class q_network(nn.Module):
	def __init__(self):
		super(q_network, self).__init__()
		self.c_layers = []
		self.c_batchnorms = []
		self.conv1_layer = nn.Conv2d(in_channels = C_ins[0], out_channels = C_outs[0], kernel_size=conv_kernel_sizes[0], stride=conv_strides[0], padding=conv_paddings[0])
		#self.bn1 = nn.BatchNorm2d(C_outs[0])
		self.conv2_layer = nn.Conv2d(in_channels = C_ins[1], out_channels = C_outs[1], kernel_size=conv_kernel_sizes[1], stride=conv_strides[1], padding=conv_paddings[1])
		#self.bn2 = nn.BatchNorm2d(C_outs[1])
		self.conv3_layer = nn.Conv2d(in_channels = C_ins[2], out_channels = C_outs[2], kernel_size=conv_kernel_sizes[2], stride=conv_strides[2], padding=conv_paddings[2])
		#self.bn3 = nn.BatchNorm2d(C_outs[2])
		self.hidden = nn.Linear(n_hidden_ins, n_hidden)
		self.Q_outputs = nn.Linear(n_hidden, n_outputs)
		
	def forward(self, x):
		x = F.relu(self.conv1_layer(x))
		x = F.relu(self.conv2_layer(x))
		x = F.relu(self.conv3_layer(x))
		#x = x.view(-1, self.num_flat_features(x))
		x = x.view(x.size(0), -1)
		x = F.relu(self.hidden(x))
		x = F.relu(self.Q_outputs(x))
		return x

	def num_flat_features(self, x):
		size = x.size()[1:]  # all dimensions except the batch dimension
		num_features = 1
		for s in size:
			num_features *= s
		return num_features

In [10]:
q = q_network()
print(q.parameters)

<bound method Module.parameters of q_network(
  (conv1_layer): Conv2d(2, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_layer): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_layer): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (hidden): Linear(in_features=7744, out_features=512, bias=True)
  (Q_outputs): Linear(in_features=512, out_features=10, bias=True)
)>


In [3]:
import pygame as pg
import pygame.gfxdraw
from pygame.math import Vector2 as vec

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [6]:
import random
import itertools
from math import ceil, pi, sin, cos, atan2


def Dist2(x1,y1,x2,y2):
	xd=abs(x1-x2); xd=min(xd, L-xd)
	yd=abs(y1-y2); yd=min(yd, L-yd)
	return xd**2+yd**2

def SignedDirs(x1,y1,x2,y2):
	if abs(y2-y1)<L-abs(y2-y1):
		yd=y2-y1
	elif y2<y1:
		yd=y2-y1+L
	else:
		yd=y2-y1-L
	if abs(x2-x1)<L-abs(x2-x1):
		xd=x2-x1
	elif x2<x1:
		xd=x2-x1+L
	else:
		xd=x2-x1-L
	return (xd,yd)

class Forager(pg.sprite.Sprite):
	size=FORAGER_SIZE #size radius
	R=FORAGER_INTERACTION_RADIUS #interaction radius
	Rsq=R**2 #squared interaction radius
	speed=10.0
	interaction_filter=np.zeros((2, FORAGER_VISRESOL, FORAGER_VISRESOL)) # 1 within R, 0 othervise
	m=int(FORAGER_VISRESOL/2)
	cellsize=2*R/FORAGER_VISRESOL
	for i in range(FORAGER_VISRESOL):
		for j in range(FORAGER_VISRESOL):
			di=abs(i-m)
			dj=abs(j-m)
			if di!=0:
				di=(0.5+di-1)*cellsize
			if dj!=0:
				dj=(0.5+dj-1)*cellsize
			if di**2+dj**2<=Rsq:
				interaction_filter[0][i][j]=1
				interaction_filter[1][i][j]=1
			else:
				interaction_filter[0][i][j]=0
				interaction_filter[1][i][j]=0

	def __init__(self, size):
		super().__init__()
		self.x=random.random()*size
		self.y=random.random()*size
		self.direction=random.randrange(DIR_RESOL)*2*pi/DIR_RESOL #movement direction in radians
		self.newdirection=None
		self.dx=None
		self.dy=None
		self.gridcell=None
		self.reward=None
		self.oldcolliders=[]
		self.pos = vec(self.x, self.y)

		self.size = 5
		if self.size % 2 == 0:
			self.size += 1
		radius = (self.size - 1) // 2
		self.radius = radius

		# required to draw the creature, the variable name must be "image" for pygame sprite.
		self.image = pg.Surface((self.size, self.size), pg.SRCALPHA)
		self.orig_image = self.image
		self.rect = self.image.get_rect(center=self.pos)

		# this is for drawing something similar eyes
		self.eye_radius = self.radius // 3

		self.color = pg.Color('green')
		
		self.vel = vec(0.0, 0.0)
		self.acc = vec(0.0, 0.0)
		self.desired = vec(0.0, 0.0)  # drawing purposes

	def draw_image(self):
		# green = translate(self.health, 0, self.max_health, 0, 255)
		# green = max(0, min(green, 255))
		# red = 255 - green
		self.color = (100, 155, 35)

		pg.gfxdraw.filled_circle(self.orig_image, self.radius, self.radius, self.radius,
									self.color)
		pg.gfxdraw.filled_circle(self.orig_image, self.size - self.eye_radius,
									abs(self.radius - self.eye_radius - 2), abs(self.eye_radius - 2), (red, red, green))
		pg.gfxdraw.filled_circle(self.orig_image, self.size - self.eye_radius,
									self.radius + self.eye_radius + 2, abs(self.eye_radius - 2), (red, red, green))

		# rotate image
		# where the creature is going
		_, angle = self.vel.as_polar()  # get direction angle
		# transform with the negative angle
		self.image = pg.transform.rotate(self.orig_image, -angle)
		# update rect position
		self.rect = self.image.get_rect(center=self.rect.center)

	# def draw_vectors(self, screen, options):
	# 	scale = 2

	# 	if options[0]:
	# 		# food distance
	# 		pg.draw.circle(screen, FOOD_COLOR,
	# 						(int(self.pos.x), int(self.pos.y)), int(self.food_dist), 1)
	# 		# poison distance
	# 		pg.draw.circle(screen, POISON_COLOR,
	# 						(int(self.pos.x), int(self.pos.y)), int(self.poison_dist), 1)

	# 		# food / poison attraction
	# 		if self.vel.length():
	# 			direction = self.vel.normalize()
	# 		else:
	# 			direction = self.vel
	# 		pg.draw.line(screen, FOOD_COLOR, self.pos,
	# 						(self.pos + (direction * self.food_attraction) * scale), 2)
	# 		pg.draw.line(screen, POISON_COLOR, self.pos,
	# 						(self.pos + (direction * self.poison_attraction) * scale), 2)

	# 	if options[1]:
	# 		# vel
	# 		pg.draw.line(screen, (244, 238, 66), self.pos,
	# 						(self.pos + self.vel), 4)
	# 		# desired
	# 		pg.draw.line(screen, pg.Color('orange'), self.pos,
	# 						(self.pos + self.desired), 4)

				


	#required shape is (features, Lh, Lv)
	def GetVI(self, grid):
		#m=np.zeros((1, FORAGER_VISRESOL, FORAGER_VISRESOL))
		m=Forager.interaction_filter-1
		#m=m-1
		interactingFooditems=grid.GetNearbyFooditems(self)
		for fooditem in interactingFooditems:
			xd, yd=SignedDirs(self.x, self.y, fooditem.x, fooditem.y)
			i=int((Forager.R+xd) / (2*Forager.R/FORAGER_VISRESOL)) # distance from left / cellsize
			j=int((Forager.R+yd) / (2*Forager.R/FORAGER_VISRESOL)) # distance from bottom / cellsize
			m[0][i][j]=1
		interactingForagers=grid.GetNearbyForagers(self)
		for forager in interactingForagers:
			xd, yd=SignedDirs(self.x, self.y, forager.x, forager.y)
			i=int((Forager.R+xd) / (2*Forager.R/FORAGER_VISRESOL)) # distance from left / cellsize
			j=int((Forager.R+yd) / (2*Forager.R/FORAGER_VISRESOL)) # distance from bottom / cellsize
			m[1][i][j]=1
		return m

class FoodItem(pg.sprite.Sprite):
	birthRate=FOOD_BR
	decayRate=FOOD_DR
	size=FOOD_SIZE

	def __init__(self, sidelength):
		super().__init__()
		self.x=random.random()*sidelength
		self.y=random.random()*sidelength
		self.gridcell=None
		self.pos = vec(self.x, self.y)

		# Guarantee odd number, for drawing
		size = 2
		if size % 2 == 0:
			size += 1

		radius = (size - 1) // 2
		self.radius = radius

		self.image = pg.Surface((size, size), pg.SRCALPHA)
		self.rect = self.image.get_rect(center=self.pos)

		self.color = (50, 50, 255)

		pg.gfxdraw.filled_circle(self.image, radius, radius, radius,
									self.color)



class GridCell():
	def __init__(self, i, j):
		self.foragers=[]
		self.fooditems=[]
		self.i=i
		self.j=j
	
class Grid():
	def __init__(self):
		self.resolution=ceil(L/(2*FORAGER_INTERACTION_RADIUS))
		self.cellsize=L/self.resolution
		self.gridcells=[[GridCell(i,j) for j in range(self.resolution)] for i in range(self.resolution)]

	def AttachForagers(self, foragers):
		for forager in foragers:
			i = int(forager.x // self.cellsize)
			j = int(forager.y // self.cellsize)
			forager.gridcell = self.gridcells[i][j]
			self.gridcells[i][j].foragers.append(forager)

	def AttachFooditems(self, fooditems):
		for fooditem in fooditems:
			i = int(fooditem.x // self.cellsize)
			j = int(fooditem.y // self.cellsize)
			fooditem.gridcell = self.gridcells[i][j]
			self.gridcells[i][j].fooditems.append(fooditem)

	def UpdateForager(self, forager):
		i = int(forager.x // self.cellsize)
		j = int(forager.y // self.cellsize)
		if forager.gridcell != self.gridcells[i][j]:
			forager.gridcell.foragers.remove(forager)
			forager.gridcell = self.gridcells[i][j]
			forager.gridcell.foragers.append(forager)

	def RemoveFoodItem(self, fooditem):
		fooditem.gridcell.fooditems.remove(fooditem)

	def getNeighbourCells(self, gridcell):
		templist=[]
		for i, j in itertools.product((-1,0,1),(-1,0,1)):
			ni=(gridcell.i+i) % self.resolution
			nj=(gridcell.j+j) % self.resolution
			templist.append(self.gridcells[ni][nj])
		return templist

	def GetNearbyFooditems(self, forager):
		fooditems=[]
		distances=[]
		for ncell in self.getNeighbourCells(forager.gridcell):
			for fooditem in ncell.fooditems:
				distance2=Dist2(forager.x, forager.y, fooditem.x, fooditem.y)
				if distance2<Forager.Rsq:
					fooditems.append(fooditem)
					distances.append(distance2)
		fooditems=[fooditem for _, fooditem in sorted(zip(distances, fooditems))]
		return fooditems

	def GetNearbyForagers(self, forager):
		foragers=[]
		distances=[]
		for ncell in self.getNeighbourCells(forager.gridcell):
			for nforager in ncell.foragers:
				if nforager==forager:
					continue
				distance2=Dist2(forager.x, forager.y, nforager.x, nforager.y)
				if distance2<Forager.Rsq:
					foragers.append(nforager)
					distances.append(distance2)
		foragers=[nforager for _, nforager in sorted(zip(distances, foragers))]
		return foragers

class ForagingModel():
	def __init__(self):
		random.seed()
		self.Reset()
		self.screen = pg.display.set_mode((WIN_WIDTH, WIN_HEIGHT), pg.SRCALPHA)
		self.clock = pg.time.Clock()
		pg.init()
		self.running = True



		
	def Reset(self):
		self.L=L # arena side length
		self.A=self.L**2 # arena area
		self.dT=DT

		self.all_sprites = pg.sprite.Group()
		self.all_creatures = pg.sprite.Group()
		self.all_foods = pg.sprite.Group()

		# foragers
		self.foragers=[Forager(self.L) for i in range(N_FORAGERS)]
		for forager in self.foragers:
			self.all_sprites.add(forager)
			self.all_creatures.add(forager)

		# food items
		equilibrium_number=int(self.A*FoodItem.birthRate/FoodItem.decayRate)
		self.fooditems=[FoodItem(self.L) for i in range(equilibrium_number)]
		for fooditem in self.fooditems:
			self.all_sprites.add(fooditem)
			self.all_foods.add(fooditem)

		# grid
		self.grid=Grid()
		self.grid.AttachForagers(self.foragers)
		self.grid.AttachFooditems(self.fooditems)

	def Update(self, action_list):
		# forager speed vectors
		# collisions
		for forager in self.foragers:
			interactingForagers=self.grid.GetNearbyForagers(forager)
			collidedForagers=[fo for fo in interactingForagers if Dist2(forager.x, forager.y, fo.x, fo.y)<(2*Forager.size)**2]
			newcolliders=[fo for fo in collidedForagers if fo not in forager.oldcolliders]
			if ELASTIC_COLLISIONS and len(newcolliders)>0:
				otherforager=newcolliders[0]
				x_dist, y_dist = SignedDirs(forager.x, forager.y, otherforager.x, otherforager.y)
				theta=atan2(y_dist, x_dist)
				#alpha1=forager.direction
				#forager.direction=2*theta+pi-alpha1
				alpha1=forager.direction
				alpha2=otherforager.direction
				vx=cos(alpha2-theta)*cos(theta)+sin(alpha1-theta)*cos(theta+pi/2)
				vy=cos(alpha2-theta)*sin(theta)+sin(alpha1-theta)*sin(theta+pi/2)
				forager.newdirection=atan2(vy, vx)
			else:
				forager.newdirection=forager.direction
			forager.reward=-1*len(newcolliders)
			forager.oldcolliders=collidedForagers

		for i, forager in enumerate(self.foragers):
			# set action
			if len(forager.oldcolliders)==0 or not ELASTIC_COLLISIONS:
				forager.direction=action_list[i]*2*pi/DIR_RESOL
			else:
				forager.direction=forager.newdirection
			forager.dx=Forager.speed*cos(forager.direction)
			forager.dy=Forager.speed*sin(forager.direction)

		# consume fooditems
		for i, forager in enumerate(self.foragers):
			interactingFooditems=self.grid.GetNearbyFooditems(forager)
			consumedFooditems=[fi for fi in interactingFooditems if Dist2(forager.x, forager.y, fi.x, fi.y)<Forager.size**2]
			notconsumedfooditems=[fi for fi in interactingFooditems if fi not in consumedFooditems]
			forager.reward+=len(consumedFooditems)
			for fooditem in consumedFooditems:
				self.grid.RemoveFoodItem(fooditem)
				self.fooditems.remove(fooditem)

		# fooditems
		if FoodItem.birthRate*self.A*self.dT>1 or FoodItem.decayRate*self.dT>1:
			print('fooditem rates are too high')
		if random.random()<FoodItem.birthRate*self.A*self.dT:
			self.fooditems.append(FoodItem(self.L))
			self.grid.AttachFooditems([self.fooditems[-1]])
		decayedFooditems=[f for f in self.fooditems if random.random()<FoodItem.decayRate*self.dT]
		for fooditem in decayedFooditems:
			self.grid.RemoveFoodItem(fooditem)
			self.fooditems.remove(fooditem)

		# forager move
		for forager in self.foragers:
			forager.x=(forager.x+forager.dx*self.dT) % self.L
			forager.y=(forager.y+forager.dy*self.dT) % self.L
			self.grid.UpdateForager(forager)

		self.screen.fill((114, 115, 116))
		self.all_sprites.draw(self.screen)

	#returns the visual information as a two dimensional array with one feature column
	#required shape is (batchsize, features, Lh, Lv)
	def GetVIs(self):
		VIs=[]
		for forager in self.foragers:
			#VIs.append(torch.from_numpy(forager.GetVI(self.grid)).type('torch.FloatTensor'))
			VIs.append(forager.GetVI(self.grid))
		#return torch.stack(VIs, dim=0)
		return np.stack(VIs, axis=0)

	def GetRewards(self):
		return [forager.reward for forager in self.foragers]

In [None]:
f = Forager(100)

In [5]:
from collections import deque
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from collections import deque

# if gpu is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

critic_DQN = q_network().to(device)
actor_DQN = q_network().to(device)
actor_DQN.load_state_dict(critic_DQN.state_dict())
actor_DQN.eval()

loss_function = nn.MSELoss()
optimizer = optim.Adam(critic_DQN.parameters(), lr=INIT_LEARNING_RATE)

def GetActionsEGreedy(q_values):
	actions=np.empty(len(q_values), dtype=np.int64)
	for i in range(len(actions)):
		if np.random.rand() < EPSILON:
			actions[i]=np.random.randint(N_ACTIONS) # random action
		else:
			actions[i]=np.argmax(q_values[i]) # optimal action
	return actions

def GetActionsBoltzmann(q_values):
	Boltzmann_T=0.1 # temperature parameter which monotonically decreases with rising number of training episodes
	actionlist=np.arange(N_ACTIONS)
	actions=np.empty(len(q_values), dtype=np.int64)
	for i in range(N_ACTIONS):
		softmax_distribution=F.softmax(q_values[i]/Boltzmann_T, dim=0).numpy()
		actions[i]=np.random.choice(actionlist, p=softmax_distribution)
	return actions

if EXPLORATION_STRATEGY=='e-greedy':
	GetActions=GetActionsEGreedy
else:
	GetActions=GetActionsBoltzmann

def AdjustLearningRate(optimizer, training_step):
	halving_steps=int(training_step / LEARNING_RATE_LIFETIME)
	lr = INIT_LEARNING_RATE/pow(2,halving_steps)
	for param_group in optimizer.param_groups:
		param_group['lr'] = lr


WIN_WIDTH = 1280
WIN_HEIGHT = 860
FPS = 40
BACKGROUND_COLOR = (117, 117, 117)


class ReinforcementLearning():
	def __init__(self, model):
		self.model=model
		self.reward_queue = deque([], maxlen=MEMORY_SIZE) #queue list of rewards for performance measurement
		self.replay_memory = deque([], maxlen=MEMORY_SIZE) #IMPLEMENTING THE REPLAY MEMORY
		# self.screen = pg.display.set_mode((WIN_WIDTH, WIN_HEIGHT), pg.SRCALPHA)
		# self.clock = pg.time.Clock()
		# pg.init()
		# self.running = True
		# self.all_sprites = pg.sprite.Group()
		# self.all_creatures = pg.sprite.Group()
		# self.all_foods = pg.sprite.Group()



	def GetMemorySample(self, batch_size):
		indices = np.random.permutation(len(self.replay_memory))[:batch_size]
		cols = [[], [], [], []] # state, action, reward, next_state
		for idx in indices:
			for col, value in zip(cols, self.replay_memory[idx]):
				col.append(value)
		cols = [np.array(col) for col in cols]
		return (cols[0], cols[1], cols[2], cols[3])

	def SetCanvas(self, modelcanvas):
		self.modelcanvas=modelcanvas

	def StartLearning(self):
		training_step=0
		for iteration in np.arange(TRAINING_START+TRAINING_STEPS*TRAINING_INTERVAL):

###############################
			# if WITHGUI:
			# 	wx.Yield()


			# Actor evaluates action
			states=self.model.GetVIs()
			with torch.no_grad():
				Q_values = actor_DQN(torch.from_numpy(states).type('torch.FloatTensor'))
				actions = GetActions(Q_values)

			# Actor plays
			self.model.Update(actions)
			rewards = self.model.GetRewards()
			next_states=self.model.GetVIs()

			# Append to reward list for performance measurement
			self.reward_queue.extend(rewards)

			# Add experience to replay_memory
			self.replay_memory.extend(zip(states, actions, rewards, next_states))

			#print('iteration:', iteration)
			# if WITHGUI and DRAWING:
			# 	self.modelcanvas.OnDraw()
			# self.screen.fill(BACKGROUND_COLOR)
			# self.all_sprites.draw(self.screen)

			# Check the condition for learning step
			if iteration < TRAINING_START or (iteration-TRAINING_START) % TRAINING_INTERVAL != 0:
				continue

			f = open('data.txt','a') 
			f.write(str(training_step)+' '+str(np.mean(self.reward_queue))+'\n')
			f.close()
			print(training_step, np.mean(self.reward_queue))

			# Critic learning step
			old_VIs, actions, rewards, next_VIs = self.GetMemorySample(BATCH_SIZE)
			next_Q_values = actor_DQN(torch.from_numpy(next_VIs).type('torch.FloatTensor')).detach()
			max_next_Q_values = torch.max(next_Q_values, dim=1, keepdim=True)[0] #we need only the first element of the resulting tuple
			target_Q_values = torch.from_numpy(rewards).view(-1, 1).type(torch.FloatTensor) +  DISCOUNT_RATE * max_next_Q_values  #expected_state_action_values

			Q_values_for_actions=critic_DQN(torch.from_numpy(old_VIs).type('torch.FloatTensor')).gather(1, torch.unsqueeze(torch.from_numpy(np.array(actions)), 1))

			loss = loss_function(Q_values_for_actions, target_Q_values)

			AdjustLearningRate(optimizer, training_step)
			optimizer.zero_grad()
			loss.backward()
			for param in critic_DQN.parameters():
				param.grad.data.clamp(-1, 1)
			optimizer.step()
			self.reward_queue
			training_step+=1

			# Regularly copy critic to actor
			if training_step % COPY_STEPS == 0:
				actor_DQN.load_state_dict(critic_DQN.state_dict())
		pg.quit()


In [None]:
# import sys
# import wx
# from wx import glcanvas
# from OpenGL.GL import *
# from OpenGL.GLU import gluNewQuadric, gluDisk
# from OpenGL.GLUT import *
# from math import radians, degrees, sin, cos, pi
# from ForagingModel import *
# from ReinforcementLearning import *
# from parameters import *

# class MyFrame(wx.Frame):
# 	def __init__(self, parent, rl):
# 		super(MyFrame, self).__init__(parent, title="2D Foraging Model", style=wx.DEFAULT_FRAME_STYLE, size=(400, 400))
# 		self.rl=rl
# 		self.model=rl.model
# 		self.panel=MainPanel(self, self.rl, self.model)
# 		self.rl.SetCanvas(self.panel.modelCanvas)
# 		self.Show(True)
# 		wx.CallLater(1000, self.rl.StartLearning)
		
# class MyCanvasBase(glcanvas.GLCanvas):
# 	def __init__(self, parent):
# 		glcanvas.GLCanvas.__init__(self, parent, -1)
# 		self.init = False
# 		self.context = glcanvas.GLContext(self)
		
# 		self.size = None
# 		self.Bind(wx.EVT_ERASE_BACKGROUND, self.OnEraseBackground)
# 		self.Bind(wx.EVT_SIZE, self.OnSize)
# 		self.Bind(wx.EVT_PAINT, self.OnPaint)

# 	def OnEraseBackground(self, event):
# 		pass # Do nothing, to avoid flashing on MSW.

# 	def OnSize(self, event):
# 		wx.CallAfter(self.DoSetViewport)
# 		event.Skip()

# 	def DoSetViewport(self):
# 		size = self.size = self.GetClientSize()
# 		self.SetCurrent(self.context)
# 		glViewport(0, 0, size.width, size.height)
		
# 	def OnPaint(self, event):
# 		dc = wx.PaintDC(self)
# 		self.SetCurrent(self.context)
# 		if not self.init:
# 			self.InitGL()
# 			self.init = True
# 		self.OnDraw()

# class ModelCanvas(MyCanvasBase):
# 	def __init__(self, parent, model):
# 		MyCanvasBase.__init__(self, parent)
# 		self.model=model
# 		self.drawing=DRAWING
# 		self.Bind(wx.EVT_LEFT_UP, self.OnLeftUp)

# 	def InitGL(self):
# 		# set viewing projection
# 		glMatrixMode(GL_PROJECTION)
# 		glLoadIdentity()
# 		glOrtho(0, self.model.L, 0, self.model.L, -1, 1)
# 		self.quadricObject=gluNewQuadric()

# 	def OnLeftUp(self, e):
# 		if self.drawing:
# 			self.drawing=False
# 		else:
# 			self.drawing=True

# 	def OnDraw(self):
# 		if not self.drawing:
# 			return
# 		# clear color and depth buffers
# 		glClear(GL_COLOR_BUFFER_BIT)

# 		posx=50.0
# 		posy=50.0
# 		direction=45
# 		size=10
		
# 		#draw food
# 		glColor3f(0.0, 0.0, 1.0);
# 		for food in self.model.fooditems:
# 			# position viewer
# 			glMatrixMode(GL_MODELVIEW)
# 			glLoadIdentity()

# 			# position object
# 			glTranslatef(food.x, food.y, 0) #translation third
# 			glScalef(food.size, food.size, food.size) #scale second
		
# 			gluDisk(self.quadricObject, 0, 0.5, 16,1)

# 		#draw animals
# 		glColor3f(1.0, 1.0, 1.0);
# 		for forager in self.model.foragers:
# 			# position viewer
# 			glMatrixMode(GL_MODELVIEW)
# 			glLoadIdentity()

# 			# position object
# 			glTranslatef(forager.x, forager.y, 0) #translation third
# 			glScalef(forager.size, forager.size, forager.size) #scale second
# 			#glRotatef(degrees(particle.rhoRad), 0, 0, 1) #rotation first
		
# 			gluDisk(self.quadricObject, 0, 0.5, 16,1)
			
# 		self.SwapBuffers()

# class MainPanel(wx.Panel):
# 	def __init__(self, parent, rl, model):
# 		wx.Panel.__init__(self, parent, -1)
# 		self.rl=rl
# 		self.model=model
		
# 		self.modelCanvas = ModelCanvas(self, model)
# 		self.modelCanvas.SetMinSize((200, 200))

# 		boxsizer=wx.BoxSizer(wx.HORIZONTAL)
# 		boxsizer.Add(self.modelCanvas, 3, wx.SHAPED | wx.ALIGN_CENTER, 0)
		
# 		self.SetSizer(boxsizer)
# 		self.SetAutoLayout(True)

In [7]:

# from parameters import *
# from ForagingModel import *
# from ReinforcementLearning import *
# if WITHGUI:
# 	import wx
# 	from GUIPanels import *

# if WITHGUI:
# 	class MyApp(wx.App):
# 		def __init__(self):
# 			wx.App.__init__(self, redirect=False)

# 		def OnInit(self):
# 			self.model=ForagingModel()
# 			self.rl=ReinforcementLearning(self.model)
# 			self.frame = MyFrame(None, rl=self.rl)
# 			return True
		
# 		def OnExitApp(self, evt):
# 			self.frame.Close(True)

# 	app = MyApp()
# 	app.MainLoop()
# else:
model=ForagingModel()
rl=ReinforcementLearning(model)
rl.StartLearning()

0 0.023
1 0.017


KeyboardInterrupt: 

In [8]:
pg.quit()