## Task 3: Game bot (50%)
Choose one of the games in OpenAI gym and make a bot that play it using GA.

Beside the code, add a link to a video of your bot playing.
##### Hint: There are many ways to do this, one of which is making a simple ANN and instead of using gradient decent use GA mutation and cross over to update your network.

In [1]:
import random
import time
from typing import List

#making bot for Open AI gym game using ANN and GA to update the weights of network
import gym as gym
import numpy as np
import tensorflow as tf

from gym import Env
from gym.spaces import Discrete, Box
import cv2
import matplotlib.pyplot as plt

In [2]:
#!pip install gym[atari]
#!pip install gym[accept-rom-license]

In [3]:
env = gym.make('ALE/AirRaid-v5', obs_type="grayscale",
               frameskip=4)

env.metadata['video.frames_per_second'] = 120

  logger.warn(


In [4]:
# CNN model for deciding what action to take according to observation
# output = 0 -> noop
# output = 1 -> fire
# output = 2 -> right
# output = 3 -> left
# output = 4 -> rightfire
# output = 5 -> leftfire

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(8, (3, 3), activation='relu'),
    tf.keras.layers.MaxPool2D((2, 2)),
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu'),
    tf.keras.layers.MaxPool2D((2, 2)),
    tf.keras.layers.Conv2D(16, (3, 3), activation='relu'),
    tf.keras.layers.MaxPool2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(12, activation='relu'),
    tf.keras.layers.Dense(6, activation='softmax')
])


In [5]:
model.build(input_shape=(None, 70, 70, 1))

In [7]:
number_of_weights = model.count_params()
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 68, 68, 8)         80        
                                                                 
 max_pooling2d (MaxPooling2D  (None, 34, 34, 8)        0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 16)        1168      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 16, 16, 16)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 14, 14, 16)        2320      
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 7, 7, 16)         0

In [8]:
# model.layers[7].get_weights()[0].shape, model.layers[7].get_weights()[1].shape

# LEARNING PROCESS

In [9]:
#range of values for weights
# Initial range was [-1, 1].
# After some testing and learning the problem,
# I found that [0, 1] is better range, just to make learning faster.
LOW = -1
UP = 1

In [10]:
def create_weight() -> float:
    return random.uniform(LOW, UP)

In [11]:
def create_individual() -> List[float]:
    return [create_weight() for _ in range(number_of_weights)]


def create_population() -> List[List[float]]:
    return [create_individual() for _ in range(population_size)]

In [12]:
def crossover(parent1: List[float], parent2: List[float]) -> (List[float], List[float]):
    crossover_point = random.randint(0, number_of_weights - 1)
    parent1 = parent1.copy()
    parent2 = parent2.copy()
    if random.random() < p_crossover:
        child1 = parent1[:crossover_point] + parent2[crossover_point:]
        child2 = parent2[:crossover_point] + parent1[crossover_point:]
        return child1, child2
    else:
        return parent1, parent2

In [14]:
def selection(population: List[List[float]]) -> (List[float], List[float]):
    return random.choices(
        population=population,
        k=2)

In [15]:
def mutation(individual) -> List[float]:
    for i in range(number_of_weights):
        if random.random() < p_mutation:
            individual[i] = create_weight()
    return individual

In [16]:
import copy
def run_ga(pop_size: int, gen_limit: int, p_crossover: float, p_mutation: float) -> List[float]:
    print("Creating initial population...")
    population_main = create_population()
    print("Evaluating initial population...")
    population_main.sort(key=fitness, reverse=True)
    print("Main population sorted.")
    print(f"Best fitness, GEN INIT : {fitness(population_main[0])}")

    for generation in range(gen_limit):
        population_next = copy.deepcopy(population_main[:2])

        print("Creating next population...")
        for _ in range(pop_size // 2 - 1):
            parent1, parent2 = selection(population_main)
            child1, child2 = crossover(parent1, parent2)
            child1 = mutation(child1)
            child2 = mutation(child2)
            population_next.append(child1)
            population_next.append(child2)
        population_main = copy.deepcopy(population_next)
        print("Evaluating next population...")
        population_main.sort(key=fitness, reverse=True)
        print("Next population sorted.")
        best_f = fitness(population_main[0])
        if best_f == 500:
            print(f"Best fitness, GEN {generation} : {best_f}")
            return population_main[0]
        else:
            print(f"Best fitness, GEN {generation} : {best_f}")

    population_main.sort(key=fitness, reverse=True)

    return population_main[0]

In [17]:
# Fitness is equal to total reward in our case,
# so to determine reward we need to play the game each time
# actions are decided by NN
def fitness(individual: List[float]) -> float:
    inserted = 0
    for i, layer in enumerate(model.layers):
        if i in [1, 3, 5, 6]:
            continue

        layer_weights_shape = layer.get_weights()[0].shape
        layer_biases_shape = layer.get_weights()[1].shape

        # total number of weights in layer = product of all dimensions
        total_n_weights = np.prod(layer_weights_shape)
        total_n_biases = np.prod(layer_biases_shape)

        # get weights and biases from individual
        layer_weights = np.array(individual[inserted:inserted + total_n_weights]).reshape(layer_weights_shape)
        layer_biases = np.array(
            individual[inserted + total_n_weights:inserted + total_n_weights + total_n_biases]).reshape(
            layer_biases_shape)

        # set weights and biases in model
        layer.set_weights([layer_weights, layer_biases])

        # update inserted
        inserted += total_n_weights + total_n_biases

    observation = env.reset()[0]
    done = False
    action_count = 0
    total_reward = 0
    while not done and action_count < 100 and total_reward < 400:
        observation = observation[35:170, 20:]
        observation = cv2.resize(observation, dsize=(70, 70), interpolation=cv2.INTER_CUBIC)
        action = model.predict(np.array([observation]), verbose=0)

        # action = number with the highest probability
        action = np.argmax(action)
        #print(action)

        observation, reward, truncted, terminated, info = env.step(action)
        action_count += 1
        done = terminated or truncted
        total_reward += reward

    return total_reward

In [18]:
#start learning and rendering the game
population_size = 10
p_crossover = 0.7
p_mutation = 0.6
best_weights = run_ga(pop_size=population_size, gen_limit=10,
                      p_crossover=p_crossover, p_mutation=p_mutation)


Creating initial population...
Evaluating initial population...
Main population sorted.
Best fitness, GEN INIT : 50.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 0 : 0.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 1 : 175.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 2 : 150.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 3 : 175.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 4 : 200.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 5 : 175.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 6 : 150.0
Creating next population...
Evaluating next population...
Next population sorted.
Best fitness, GEN 7 : 250.0
Crea

In [19]:
#show the game with best weights
best_weights

[0.7943981737311367,
 -0.9025703797331566,
 -0.4866276447790834,
 0.13488009018940939,
 0.9763249719892328,
 0.9032795831284517,
 -0.24176450309669417,
 -0.39377868541726224,
 0.29315482705121587,
 -0.7639277210890134,
 -0.02567766768340296,
 -0.9777078618932242,
 -0.5747513394167003,
 -0.3756231719222334,
 0.7089013801152966,
 -0.8098922733592782,
 0.43874634996014605,
 -0.02694349776848104,
 -0.041676614755352226,
 0.5896107496187517,
 -0.537246291241801,
 -0.8625106066584103,
 -0.7880095355496095,
 0.16109900956638512,
 0.7013094396481623,
 0.6284497130065567,
 -0.040880370757030216,
 -0.2477811479733567,
 0.5705750879550475,
 -0.9670162945776322,
 -0.5736106787365753,
 -0.9540969958592072,
 0.975685190537114,
 -0.9290937826229135,
 0.6524556577003295,
 -0.20601070477211803,
 -0.6289551702629299,
 -0.9687328941863851,
 -0.9914662284017484,
 0.4141349224124524,
 -0.14011969625661025,
 -0.17719054129717593,
 0.6512361878600903,
 -0.874201083801112,
 -0.9915408550788691,
 -0.0183753441

In [20]:
env.close()