# Reinforcement Learning - Dynamic environment

In [1]:
## Librairies
import numpy as np
import matplotlib.pyplot as plt
import random
from scipy.special import softmax


## Parameters

In [None]:
# env configuration
SIZE = 12
HM_EPISODES = 600
TIME = 2 * SIZE * SIZE

# model parameters
beta = 0.1
epsilon = 0.1  # randomness
EPS_DECAY = 0.99999999  # Every episode will be epsilon*EPS_DECAY
LEARNING_RATE = 0.2
DISCOUNT = 0.95
proba_pitfall = 0.7
fire_prob_spread = 0.3

# rewards
MOVE_PENALTY = 1
WALL_PENALTY = 20000
FIRE_PENALTY = 20
PITFALL_PENALTY = 10
SAVING_REWARD = 50
ESCAPE_REWARD = 100

# element of maze
entrances = [(0,0)]
exits = [(9,9)]
allowed_moves = [(0, 1), (1, 0), (-1, 0), (0, -1)]

## Player class

In [None]:
class Player:
    def __init__(self, coord):
        self.x = coord[0]
        self.y = coord[1]
        self.cumulative_reward = 0
        self.Q = np.zeros((SIZE, SIZE, len(allowed_moves)))
        self.BestQ = None
        self.best_reward = -np.inf

    def get_coord(self):
        return (self.x, self.y)

    def set_coord(self, coord):
        self.x, self.y = coord

    def reset(self, coord, Q=None):
        if not Q and self.best_reward < self.cumulative_reward:
            self.best_reward = self.cumulative_reward
            self.BestQ = np.copy(Q)

        self.x = coord[0]
        self.y = coord[1]
        self.cumulative_reward = 0

    def possible_moves(self):
        moves = []
        for i in range(len(allowed_moves)):
            if (
                self.x + allowed_moves[i][0] >= 0
                and self.x + allowed_moves[i][0] < SIZE
                and self.y + allowed_moves[i][1] >= 0
                and self.y + allowed_moves[i][1] < SIZE
            ):
                moves.append(i)
        return moves

    def move(self, num):

        self.x += allowed_moves[num][0]
        self.y += allowed_moves[num][1]

        # If player is out of bounds
        if self.x < 0:
            self.x = 0
        elif self.x > SIZE - 1:
            self.x = SIZE - 1

        if self.y < 0:
            self.y = 0
        elif self.y > SIZE - 1:
            self.y = SIZE - 1

## Environement 