In [2]:
import pygame
import time
from random import *
import numpy as np
import scipy.special

# Define screen dimensions
screen_width = 800
screen_height = 600

pygame 2.6.0 (SDL 2.28.4, Python 3.11.7)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [3]:
#Neural network with Python 
#now let us try a four layer network

class neuralNetwork:
    
    #initialize the neural network
    def __init__(self, inputnodes, hiddennodes1, hiddennodes2, outputnodes, learningrate):
        #set number of nodes in each input, hidden, output layers
        self.inodes=inputnodes
        self.hnodes1=hiddennodes1
        self.hnodes2=hiddennodes2
        self.onodes=outputnodes
        
        #link weight matrices, wih (weights from input layer to hidden layer 1) and who (weights from hidden 2 to output)
        # whh (weights from hidden layer 1 to hidden layer 2
        # weights inside the arrays are w_i_j, where link is from node i to node j in the next layer
        # w11 w21
        # w12 w22 etc
        # you would have code like this:
        #self.wih = (np.random.rand(self.hnodes,self.inodes) - 0.5)
        #self.who = (np.random.rand(self.onodes,self.hnodes) - 0.5)
        # However, more popular initial weights are normal probability distribution around zero, with standard deviation related to the number of incoming links to a node.
        # Now the new code is
        self.wih = np.random.normal(0.0, pow(self.inodes, -0.5),(self.hnodes1, self.inodes))
        self.whh = np.random.normal(0.0, pow(self.hnodes1, -0.5),(self.hnodes2, self.hnodes1))
        self.who = np.random.normal(0.0, pow(self.hnodes2, -0.5),(self.onodes, self.hnodes2))
        
        
        #set learning rate
        self.lr=learningrate
        
    
        # activation function is the sigmoid function
        self.activation_function =  lambda x:scipy.special.expit(x)
        
        pass
    
    # train the neural network
    def train(self, inputs_list, targets_list,track_loss):
        # convert inputs list and targets list to 2d arrays
        inputs = np.array(inputs_list, ndmin=2).T
        targets = np.array(targets_list, ndmin =2).T
        
        #calculate signals into the hidden layer 1
        hidden_inputs1 = np.dot(self.wih, inputs)
        
        #calculate the signals emerging from the hidden layer 1
        hidden_outputs1 = self.activation_function(hidden_inputs1)
        
        #calculate signals into the hidden layer 2
        hidden_inputs2 = np.dot(self.whh, hidden_outputs1)
        
        #calculate the signals emerging from the hidden layer 2
        hidden_outputs2 = self.activation_function(hidden_inputs2)
        
        #calculate signals into final output layer
        final_inputs = np.dot(self.who, hidden_outputs2)
        
        #calculate the singals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        #output layer error is the (target -  actual)
        output_errors = targets - final_outputs
        sum_errors = np.sum(np.square(output_errors))
        #print(sum_errors)
        track_loss.append(sum_errors)
                                
        #hidden layer 2 error is the output_errors, split by weights, recombined at the hidden layer 2 nodes
        hidden_errors2 =  np.dot(self.who.T, output_errors)
        
        #hidden layer 1 error is the hidden layer 2 errors, split by weights, recombined at the hidden layer 1 nodes
        hidden_errors1 =  np.dot(self.whh.T, hidden_errors2)
        
        #update the weights for the link between hidden layer 2 and output layers
        self.who += self.lr*np.dot((output_errors * final_outputs*(1.0-final_outputs)),np.transpose(hidden_outputs2))
        
        #update the weights for the links between the input and hidden layers
        self.whh += self.lr*np.dot((hidden_errors2*hidden_outputs2*(1.0 - hidden_outputs2)),np.transpose(hidden_outputs1))
        
        #update the weights for the links between the input and hidden layers
        self.wih += self.lr*np.dot((hidden_errors1*hidden_outputs1*(1.0 - hidden_outputs1)),np.transpose(inputs))
        
        pass
    
    #stack the layers to construct neural network
    def construct(self, inputs):
        #calculate signals into the hidden layer 1
        hidden_inputs1 = np.dot(self.wih, inputs)
        
        #calculate the signals emerging from the hidden layer 1
        hidden_outputs1 = self.activation_function(hidden_inputs1)
        
        #calculate signals into the hidden layer 2
        hidden_inputs2 = np.dot(self.whh, hidden_outputs1)
        
        #calculate the signals emerging from the hidden layer 2
        hidden_outputs2 = self.activation_function(hidden_inputs2)
        
        #calculate signals into final output layer
        final_inputs = np.dot(self.who, hidden_outputs2)
        
        #calculate the singals emerging from final output layer
        final_outputs = self.activation_function(final_inputs)
        
        return final_outputs

'''class neuralNetwork:
    def __init__(self, inputN, hiddenN, outputN, lr):
        self.inum = inputN
        self.hnum = hiddenN
        self.onum = outputN
        self.lr = lr

        # wih is Weights from Input to Hidden
        self.wih = np.array(np.random.rand(self.hnum, self.inum) - 0.5)
        # who is Weights from Hidden to Output
        self.who = np.array(np.random.rand(self.onum, self.hnum) - 0.5)

        # AF is activation function
        self.AF = lambda x:scipy.special.expit(x)

    def train(self, inputList, targetList):
        # Calculate each layer
        inputs = np.array([inputList]).T
        targets = np.array([targetList]).T
        hiddenInputs = np.dot(self.wih, inputs)
        hiddenOutputs = self.AF(hiddenInputs)
        finalInputs = np.dot(self.who, hiddenOutputs)
        finalOutputs = self.AF(finalInputs)

        # Calculate the error of Outputs
        outputErrors = targets - finalOutputs
        # Back Propagate to Hidden Layer
        hiddenErrors = np.dot(self.who.T, outputErrors)

        # delta Wjk = 2 * alpha * (Tk - Ok) * Ok * (1 - Ok)* Oj'
        # Update who, Weights from Hidden to Output
        self.who += self.lr * np.dot((outputErrors * finalOutputs * (1.0 - finalOutputs)), hiddenOutputs.T)
        # Update wih, Weights from Inputs to Hidden
        self.wih += self.lr * np.dot((hiddenErrors * hiddenOutputs * (1.0 - hiddenOutputs)), inputs.T)

    def construct(self, inputList):
        inputs = np.array([inputList]).T
        hiddenInputs = np.dot(self.wih, inputs)
        hiddenOutputs = self.AF(hiddenInputs)
        finalInputs = np.dot(self.who, hiddenOutputs)
        finalOutputs = self.AF(finalInputs)

        return finalOutputs'''

"class neuralNetwork:\n    def __init__(self, inputN, hiddenN, outputN, lr):\n        self.inum = inputN\n        self.hnum = hiddenN\n        self.onum = outputN\n        self.lr = lr\n\n        # wih is Weights from Input to Hidden\n        self.wih = np.array(np.random.rand(self.hnum, self.inum) - 0.5)\n        # who is Weights from Hidden to Output\n        self.who = np.array(np.random.rand(self.onum, self.hnum) - 0.5)\n\n        # AF is activation function\n        self.AF = lambda x:scipy.special.expit(x)\n\n    def train(self, inputList, targetList):\n        # Calculate each layer\n        inputs = np.array([inputList]).T\n        targets = np.array([targetList]).T\n        hiddenInputs = np.dot(self.wih, inputs)\n        hiddenOutputs = self.AF(hiddenInputs)\n        finalInputs = np.dot(self.who, hiddenOutputs)\n        finalOutputs = self.AF(finalInputs)\n\n        # Calculate the error of Outputs\n        outputErrors = targets - finalOutputs\n        # Back Propagate to H

In [6]:
# Initialize Pygame
pygame.init()

class Game:
    def __init__(self):
        self.screen = pygame.display.set_mode((screen_width, screen_height))
        pygame.display.set_caption("Platformer")
        self.clock = pygame.time.Clock()
        self.running = True

        # Create sprite groups
        self.all_sprites = pygame.sprite.Group()
        self.platforms = pygame.sprite.Group()
        self.goals = pygame.sprite.Group()
        self.traps = pygame.sprite.Group()

        # Create player
        self.player = Player()
        self.all_sprites.add(self.player)

        # Create platforms
        self.platform0 = Platform(0, 590, screen_width, 20, move=False)
        self.platform1 = Platform(300, 500, 400, 20, move=False)
        self.platform2 = Platform(randrange(0, screen_width, 50), 400, 200, 20)
        self.platform3 = Platform(randrange(0, screen_width, 50), 300, 300, 20)
        self.platform4 = Platform(randrange(0, screen_width, 50), 200, 250, 20)
        self.platform5 = Platform(randrange(0, screen_width, 50), 100, 125, 20)
        self.platformList = [self.platform0, self.platform1, self.platform2, self.platform3, self.platform4, self.platform5]
        self.platforms.add(self.platform0, self.platform1, self.platform2, self.platform3, self.platform4, self.platform5)
        self.all_sprites.add(self.platform0, self.platform1, self.platform2, self.platform3, self.platform4, self.platform5)

        # Create goal
        goal = Goal(screen_width - 100, 0)
        self.goals.add(goal)
        self.all_sprites.add(goal)

        # Create traps
        trap1 = Trap(0, 589, 300, 21)
        trap2 = Trap(500, 589, 300, 21)
        self.traps.add(trap1, trap2)
        self.all_sprites.add(trap1, trap2)

    def handle_events(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                self.running = False
            if event.type == pygame.KEYDOWN:
                if event.key == pygame.K_UP:
                    self.player.jump()

    def update(self):
        self.all_sprites.update()

        # Collision detection with platforms
        hits = pygame.sprite.spritecollide(self.player, self.platforms, False)
        if hits:
            # Land on top of the platform
            self.player.rect.bottom = hits[0].rect.top
            self.player.velocity.y = 0
            self.player.on_ground = True
        else:
            self.player.on_ground = False

        # Collision detection with traps
        if self.hitTrap():
            self.running = False

        # Collision detection with goal
        if self.hitGoal():
            self.running = False

    def hitGoal(self):
        return pygame.sprite.spritecollide(self.player, self.goals, False)

    def hitTrap(self):
        return pygame.sprite.spritecollide(self.player, self.traps, False)
        
    def getObservations(self):
        obs = []
        for platform in self.platformList:
            obs.append(platform.rect.x)
            obs.append(platform.rect.y)
            obs.append(platform.width)
            obs.append(int(platform.direction))
        obs.append(self.player.rect.x)
        obs.append(self.player.rect.y)
        return obs
    
    def draw(self):
        self.screen.fill((0, 0, 0))
        self.all_sprites.draw(self.screen)
        pygame.display.flip()

# Define Player class
class Player(pygame.sprite.Sprite):
    def __init__(self):
        super().__init__()
        self.image = pygame.Surface((50, 50))
        self.image.fill((255, 255, 255))
        self.rect = self.image.get_rect()
        self.rect.center = (screen_width // 2, screen_height)
        self.velocity = pygame.math.Vector2(0, 0)
        self.gravity = 0.5
        self.jump_strength = -10
        self.on_ground = False

    def update(self):
        self.velocity.y += self.gravity
        self.rect.y += self.velocity.y

        # Prevent player from falling below the screen
        if self.rect.bottom >= screen_height:
            self.rect.bottom = screen_height
            self.velocity.y = 0
            self.on_ground = True

        # Move player left and right
        keys = pygame.key.get_pressed()
        if keys[pygame.K_LEFT]:
            self.moveLeft()
        if keys[pygame.K_RIGHT]:
            self.moveRight()

        # Prevent player from moving off the screen
        if self.rect.left < 0:
            self.rect.left = 0
        if self.rect.right > screen_width:
            self.rect.right = screen_width

    # Jump
    def jump(self):
        if self.on_ground:
            self.velocity.y = self.jump_strength
            self.on_ground = False
            
    def moveLeft(self):
        self.rect.x -= 5
        
    def moveRight(self):
        self.rect.x += 5
        

class Platform(pygame.sprite.Sprite):
    def __init__(self, x, y, width, height, move=True):
        super().__init__()
        self.image = pygame.Surface((width, height))
        self.image.fill((0, 255, 0))
        self.rect = self.image.get_rect()
        self.rect.topleft = (x, y)
        self.width = width
        
        self.move = move
        # Direction as True means move right
        self.direction = True

    def update(self):
        if self.move:
            if self.direction:
                self.rect.x += 5
            else:
                self.rect.x -= 5
        if self.rect.left < 0:
            self.rect.left = 0
            self.direction = True
        if self.rect.right > screen_width:
            self.rect.right = screen_width
            self.direction = False

class Goal(pygame.sprite.Sprite):
    def __init__(self, x, y):
        super().__init__()
        self.image = pygame.Surface((100, 100))
        self.image.fill((0, 0, 255))
        self.rect = self.image.get_rect()
        self.rect.topleft = (x, y)

class Trap(pygame.sprite.Sprite):
    def __init__(self, x, y, width, height):
        super().__init__()
        self.image = pygame.Surface((width, height))
        self.image.fill((255, 0, 0))  # Red color for traps
        self.rect = self.image.get_rect()
        self.rect.topleft = (x, y)

In [8]:
def run():
    game = Game()
    while game.running:
        game.handle_events()
        game.update()
        game.draw()
        game.clock.tick(60)
    pygame.quit()

In [22]:
def trainBot(eps, decay, lr, discount, qNeuralNet, polNeuralNet, epochs, batchSize):
    rewardTotalList = []
    trackLoss = []
    for epoch in range(epochs):
        eps *= decay
        game = Game()
        totalReward = 0
        t = 0
        while game.running:
            t += 1
            # Action of 0 is left, action of 1 is right, action of 2 is jump, and action of 3 is stay still
            obs = game.getObservations()
            # print(len(obs))
            # print(qNeuralNet.hiddenOutputs).shape()
            if random() < eps:
                action = choice([0, 1, 2, 3])
            else:
                output = list(polNeuralNet.construct(obs))
                action = output.index(max(output))
                
            if action == 0:
                game.player.moveLeft()
            elif action == 1:
                game.player.moveRight()
            elif action == 2:
                game.player.jump()
                
            game.handle_events()
            game.update()
            game.draw()

            # We want to train the q neural network based off what happened
            # Observations after this move
            newObs = game.getObservations()

            reward = 0.1
            y = game.player.rect.y
            x = game.player.rect.x
            distance = ((y ** 2) + ((screen_width - x) ** 2)) / 1000000 # Furthest is 1
            reward += (0.5 - distance)/10
            reward *= scipy.special.expit(500 - t)
            if game.hitGoal():
                # print("Did It!")
                reward += 50
                # reward += 15000/t
            if game.hitTrap():
                reward -= 50
            # The output that the q net used to give
            oldOutput = qNeuralNet.construct(obs)
            targetOutput = [0, 0, 0, 0]
            for i in range(4):
                thing = qNeuralNet.construct(newObs)[i]
                # print(oldOutput[i][0])
                targetOutput[i] = oldOutput[i] + (lr * (reward + (discount * thing) - oldOutput[i]))
                targetOutput[i] = oldOutput[i] + (lr * (reward + (discount * thing) - oldOutput[i]))
            # print("Shape of targetOutput: " + str(np.shape(targetOutput)))
            # Train Q neural network
            qNeuralNet.train(obs, targetOutput, trackLoss)
            totalReward += reward
            
            # game.clock.tick(60)
        pygame.quit()
        rewardTotalList.append(totalReward)
        # print(f"Reward earned in Epoch {epoch}: {totalReward}")
        # Update the Policy Network every 5 games
        if epoch % batchSize == 0:
            polNeuralNet.wih = qNeuralNet.wih
            polNeuralNet.whh = qNeuralNet.whh
            polNeuralNet.who = qNeuralNet.who
    return [qNeuralNet, polNeuralNet]

In [24]:
def runBot(polNeuralNet):
    game = Game()
    while game.running:
        # Action of 0 is left, action of 1 is right, action of 2 is jump, and action of 3 is stay still
        obs = game.getObservations()
        output = list(polNeuralNet.construct(obs))
        action = output.index(max(output))
            
        if action == 0:
            game.player.moveLeft()
        elif action == 1:
            game.player.moveRight()
        elif action == 2:
            game.player.jump()
            
        game.handle_events()
        game.update()
        game.draw()
        if game.hitGoal():
            print("You Reached the Goal!")
        if game.hitTrap():
            print("You Died!")
        game.clock.tick(60)
    pygame.quit()

def testBot(polNeuralNet):
    game = Game()
    while game.running:
        # Action of 0 is left, action of 1 is right, action of 2 is jump, and action of 3 is stay still
        obs = game.getObservations()
        output = list(polNeuralNet.construct(obs))
        action = output.index(max(output))
            
        if action == 0:
            game.player.moveLeft()
        elif action == 1:
            game.player.moveRight()
        elif action == 2:
            game.player.jump()
            
        game.handle_events()
        game.update()
        game.draw()
        if game.hitGoal():
            return True
        if game.hitTrap():
            return False
    pygame.quit()

In [26]:
# Save the current neural net to a file
def saveTo(neuralNet, fileName):
    with open(fileName, "w") as f:
        f.write(str(neuralNet.lr) + "\n")
        vals = list(neuralNet.wih)
        for li in vals:
            for num in li:
                f.write(str(num) + " ")
        f.write("\n")
        vals = list(neuralNet.whh)
        for li in vals:
            for num in li:
                f.write(str(num) + " ")
        f.write("\n")
        vals = list(neuralNet.who)
        for li in vals:
            for num in li:
                f.write(str(num) + " ")
    return

# Load a neural net from a file
def loadFrom(fileName):
    neuralNet = neuralNetwork(26, 256, 256, 4, 0)
    with open(fileName) as f:
        neuralNet.lr = float(next(f))
        
        vals = [float(x) for x in next(f).split()]
        vals = np.array(vals)
        neuralNet.wih = vals.reshape((256, 26))

        vals = [float(x) for x in next(f).split()]
        vals = np.array(vals)
        neuralNet.whh = vals.reshape((256, 256))
        
        vals = [float(x) for x in next(f).split()]
        vals = np.array(vals)
        neuralNet.who = vals.reshape((4, 256))
    return neuralNet
        

In [14]:
testingNet = loadFrom("workingBot.txt")

In [32]:
# Here so that the bot's progress does not reset
eps = 1
decay = 0.995
lr = 0.005
discount = 0.9
epochs = 1000
batchSize = 20
'''
# Q is 27 because each of the 6 platforms have 4 inputs, the player has 2 inputs, and the last input is the action
qNeuralNet = neuralNetwork(26, 256, 256, 4, lr)
polNeuralNet = neuralNetwork(26, 256, 256, 4, lr)
qNeuralNet, polNeuralNet, rewardTotalList = trainBot(eps, decay, lr, discount, qNeuralNet, polNeuralNet, 1000, 20)
print(rewardTotalList)
'''

'\n# Q is 27 because each of the 6 platforms have 4 inputs, the player has 2 inputs, and the last input is the action\nqNeuralNet = neuralNetwork(26, 256, 256, 4, lr)\npolNeuralNet = neuralNetwork(26, 256, 256, 4, lr)\nqNeuralNet, polNeuralNet, rewardTotalList = trainBot(eps, decay, lr, discount, qNeuralNet, polNeuralNet, 1000, 20)\nprint(rewardTotalList)\n'

In [16]:
def successPercent(neuralNet, batchSize):
    success = 0
    for game in range(batchSize):
        if testBot(testingNet):
            success += 1
    return (success / batchSize)

In [345]:
baseSuccess = successPercent(testingNet)
print(baseSuccess)

0.58


In [355]:
testingQNet = loadFrom("workingBot.txt")
testingPNet = loadFrom("workingBot.txt")
successList = [baseSuccess]
iteration = 0
for i in range(3):
    print(i)
    iteration += 1
    testingQNet, testingPNet = trainBot(eps, decay, lr, discount, testingQNet, testingPNet, epochs, batchSize)
    grade = successPercent(testingPNet)
    successList.append(grade)
    fileName = "workingBot" + str(iteration * 1000) + ".txt"
    saveTo(testingPNet, fileName)
    

0
1
2


In [357]:
print(successList)

[0.58, 0.49, 0.48, 0.58]


In [361]:
bot1000 = loadFrom("workingBot1000.txt")

In [43]:
testingQNet = loadFrom("workingBot.txt")
testingPNet = loadFrom("workingBot.txt")
testingQNet, testingPNet = trainBot(eps, decay, lr, discount, testingQNet, testingPNet, epochs, batchSize)

In [375]:
print(successPercent(testingPNet))

0.54


In [379]:
runBot(testingPNet)

You Died!


In [383]:
runBot(loadFrom("workingBot.txt"))

You Reached the Goal!


In [45]:
runBot(testingPNet)

You Died!


In [32]:
run()

In [28]:
def graphSuccess(neuralNet, intervalSize, trainAmount):
    successPercentList = []
    for i in range(trainAmount):
        testingQNet, neuralNet = trainBot(eps, decay, lr, discount, neuralNet, neuralNet, 1, 1)
        num = i + 1
        if num % intervalSize == 0:
            successPercentList.append(successPercent(neuralNet, 100))
    return successPercentList

In [34]:
neuralNet = neuralNetwork(26, 256, 256, 4, 0.005)
successList = graphSuccess(neuralNet, 50, 1000)

In [36]:
successList

[0.46,
 0.58,
 0.51,
 0.51,
 0.44,
 0.51,
 0.58,
 0.56,
 0.49,
 0.57,
 0.48,
 0.45,
 0.51,
 0.61,
 0.53,
 0.61,
 0.52,
 0.61,
 0.55,
 0.53]