In [8]:
# Importing all the libraries
import json
import pygame as pg
import sys
import threading
import numpy as np
import rect
import copy
import random



In [5]:
np.random.seed(42)  # For reproducible results


# Variables for simulation window
# Variables for the width and height of the simulation window
WIDTH = 1100
HEIGHT = 700

# Variable for the width and the height of the simulation inside the main window
# Must be <= HEIGHT and WIDTH
GRID_WIDTH = WIDTH - 400
GRID_HEIGHT = HEIGHT
FPS = 100

In [6]:
# A class for storing the Qvlaues for each grid cell
class GridCell:
    def __init__(self):
        self.qvals = [0, 0, 0, 0, 0]

In [2]:
# A class to create the ocean environment for the simulation

class OceanEnvironment:
    def __init__(self, row, col, no_row, no_col, mode):
        self.current_row = row
        self.current_col = col

        self.max_rows = no_row - 1
        self.max_cols = no_col - 1

        self.color = 0

        if mode == 0:
            self.fish_population = self.gradient_fish_generator()
        else:
            self.fish_population = self.random_fish_generator()
        self.environment_val = 0

        self.population_history = []

    def gradient_fish_generator(self):
        dist_from_shore_y = abs(self.max_rows - self.current_row)
        dist_from_shore_x = abs(self.max_cols - self.current_col)

        diag_dist = pow(pow(dist_from_shore_x, 2) + pow(dist_from_shore_y, 2), 0.5)
        max_dist = pow(pow(self.max_rows, 2) + pow(self.max_cols, 2), 0.5)
        fish_pop = int((255 / max_dist) * diag_dist)
        if fish_pop == 0:
            return 1
        return fish_pop


    def random_fish_generator(self):
        return np.random.randint(5, 100)

In [9]:
# A class to create and handle the actor for Qlearning
class Boat:
    def __init__(self, grid):
        # position is the grid coordinates of the boat
        self.reset_pos = (len(grid) // 2, len(grid) - 1)

        self.pos = list(self.reset_pos)

        self.fuel_used = 0
        self.grid = grid

    def move_up(self):
        if self.pos[1] > 0:
            self.pos[1] -= 1
            self.render()
            return True
        return False

    def move_down(self):
        if self.pos[1] < len(self.grid) - 1:
            self.pos[1] += 1
            self.render()
            return True
        return False

    def move_left(self):
        if self.pos[0] > 0:
            self.pos[0] -= 1
            self.render()
            return True
        return False

    def move_right(self):
        if self.pos[0] < len(self.grid) - 1:
            self.pos[0] += 1
            self.render()
            return True
        return False

    def fish(self):
        decline = 10
        self.grid[self.pos[0]][self.pos[1]].fish_population -= decline

    def render(self):
        cell_width = GRID_WIDTH / len(self.grid[0])
        cell_height = GRID_HEIGHT / len(self.grid)
        rect = (cell_width * self.pos[0], cell_height * self.pos[1], cell_width, cell_height)
        pg.draw.rect(screen, white, rect)
        pg.display.update(pg.Rect(rect))

# The Q-learning Algorithm

Action Space

0 -> move up<br>
1 -> move down<br>
2 -> move left<br>
3 -> move right<br>
4 -> fish

In [11]:


# Epsilon greedy policy for choosing the action for Q-learning
def epsilon_greedy_policy(Qtable, state, epsilon):
    random_int = random.uniform(0, 1)
    if random_int > epsilon:
        action = Qtable[state[0]][state[1]].qvals.index(max(Qtable[state[0]][state[1]].qvals))
    else:
        action = random.randint(0, 4)
    return action

In [12]:
# A function to return the rewards for each action taken

def take_step(boat, action, environment_grid, avg_population):
    population_d = 70
    if action == 0:
        if boat.move_up():
            return -2
        return -100
    elif action == 1:
        if boat.move_down():
            return -1.5
        return -100
    elif action == 2:
        if boat.move_left():
            return -1
        return -100
    elif action == 3:
        if boat.move_right():
            return -1
        return -100
    elif action == 4:
        fish_population = environment_grid[boat.pos[1]][boat.pos[0]].fish_population
        if fish_population < avg_population:
            return -1*fish_population/(population_d*255)
        return (fish_population/population_d)/255

In [14]:
# The actual Q learninng algorithm

learning_rate = 0.5
gamma = 0.95


def train(training_episodes, decay_rate, max_steps, Qtable, environment_grid, boat, avg_population):

    max_epsilon = 1.0
    min_epsilon = 0.05

    for episode in range(training_episodes):

        epsilon = min_epsilon + (max_epsilon - min_epsilon) * np.exp(-decay_rate * episode)
        # Reset the environment
        state = list(boat.reset_pos)
        copy_env_grid = copy.deepcopy(list(environment_grid))
        boat.pos = state

        # repeat
        for step in range(max_steps):

            action = epsilon_greedy_policy(Qtable, state, epsilon)
            reward = take_step(boat, action, copy_env_grid, avg_population)
            new_state = boat.pos

            state_val = Qtable[state[1]][state[0]].qvals
            Qtable[state[1]][state[0]].qvals[action] = state_val[action] + learning_rate * (
                        reward + gamma * max(Qtable[new_state[1]][new_state[0]].qvals) - state_val[action])

            if action == 4:
                copy_env_grid[state[1]][state[0]].fish_population -= 100

            # Our state is the new state
            state = new_state


In [15]:
# Function for generating the Q-table
def create_qtable(rows, columns):
    return [[GridCell() for j in range(columns)] for i in range(rows)]

In [16]:
# Function for generatig the environment grid
def create_env(rows, columns, mode):
    return [[OceanEnvironment(i, j, rows, columns, mode) for j in range(columns)] for i in range(rows)]