##### Reinforcement Learning

*A small custome environment with 4 states and 2 actions has been created.*

*The model tries to predict the next action agent to should take inorder to increase the rewards.*

In [20]:
#importing necessary libraries
import gym #library for environment ccreation
from gym import spaces
import numpy as np

In [21]:
#We create a new customized environment and name it as CustomEnvironment
class CustomEnvironment(gym.Env):
    
    #defining a discrete observation space with four possible states (0,1,2,3) 
    #and a discrete action space with two possible actions (0,1). 
    #We also initialize the state variable to None.
    def __init__(self):
        
        self.observation_space=spaces.Discrete(4)  #Three possible states: 0,1,2,3
        self.action_space=spaces.Discrete(2)  #Two possible actions: 0,1
        self.state = None        
        
    
    #reset method is used to reset the environment to its initial state. Here, 
    #we randomly sample an initial state from the observation space and set it as the current state. 
    #We then return the current state as the initial observation.
    def reset(self):
        self.state=self.observation_space.sample()
        return self.state
    
    
    #The step method is responsible for executing an action in the environment and returning the new state, 
    #reward, done flag, and additional information. 
    def step(self, action):
        #Perform the given action and update the state
        if action==0:
            self.state=(self.state + 1) % 3  #Move to the next state
        else:
            self.state=(self.state - 1) % 3  #Move to the previous state

        #Calculate the reward
        #compute the reward based on the current state
        reward=self._calculate_reward()

        #Check if the episode is done
        done=False

        #Additional information (optional)
        info={}

        return self.state, reward, done, info

    def _calculate_reward(self):
        #Define the reward function based on the current state
        if self.state==0:
            return 0.5
        elif self.state==1:
            return 1.0
        elif self.state==2:
            return -0.5
        else:
            return -1.0

In [22]:

#Create an instance of the custom environment and assign it to the env variable. 
#This instance represents our custom environment.
env = CustomEnvironment()

In [23]:

#Reset the environment and get the initial observation
#Store the returned initial observation in the observation variable.
observation = env.reset()

In [24]:

#Run the interaction loop for a 10 steps
for _ in range(10):

    #Choose a random action from the given actions 
    action = env.action_space.sample()

    #Perform the action in the environment
    next_observation, reward, done, _ = env.step(action)

    #Print the current state, action, reward, and next state
    print(f"Current state: {observation}, Action: {action}, Reward: {reward}, Next state: {next_observation}")

    #Update the current observation
    observation = next_observation

#Close the environment
#Free up any resources associated with the environment
env.close()


Current state: 3, Action: 1, Reward: -0.5, Next state: 2
Current state: 2, Action: 0, Reward: 0.5, Next state: 0
Current state: 0, Action: 1, Reward: -0.5, Next state: 2
Current state: 2, Action: 1, Reward: 1.0, Next state: 1
Current state: 1, Action: 0, Reward: -0.5, Next state: 2
Current state: 2, Action: 1, Reward: 1.0, Next state: 1
Current state: 1, Action: 1, Reward: 0.5, Next state: 0
Current state: 0, Action: 0, Reward: 1.0, Next state: 1
Current state: 1, Action: 0, Reward: -0.5, Next state: 2
Current state: 2, Action: 0, Reward: 0.5, Next state: 0
