In [1]:
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from IPython.display import HTML
from matplotlib import animation
import matplotlib.pyplot as plt
from gym import Env, spaces
import numpy as np
import random
import math
import cv2
import io

In [15]:
class TC_environment(Env):
    def __init__(self):
        self.observations = 4
        self.max_step_size = 4
        self.deltaT = 3
        self.width = 50
        self.hight = 50
        self.episode_path = []
        self.car_init_vel = 1
        self.car_state = [2,self.car_init_vel]
        self.action_space = spaces.Discrete(2) # A = {v+, v-}

    def reset(self):
        increments = []
        self.car_state[1] = self.car_init_vel 
        step_size = random.randint(2, self.max_step_size)
        x_random = random.randint(round(-self.width/2), round(self.width/2))
        y_random = random.randint(round(-self.hight/2), round(self.hight/2))
        
        if(y_random == self.car_state[0]):
            y_random += 1 
            
        init_pos = (x_random, y_random)
        self.episode_path = []
        self.episode_path.append(init_pos)
        theta = random.uniform(0, 2*np.pi)
        for i in range(0,self.observations):
            increments.append((step_size*round(np.cos(theta)), step_size*round(np.sin(theta))))

        for i in range(0,self.observations-1):
            listp = []
            listp.append(self.episode_path[i])
            listp.append(increments[i])
            self.episode_path.append(tuple([sum(tup) for tup in zip(*listp)]))

        return self.episode_path

    def select_action(self, action):
        if (action == 0):
            self.car_state[1] += 0.1
        if (action == 1):
            self.car_state[1] -= 0.1
        
    def step(self, action):
        A = self.episode_path[3][1] - self.episode_path[0][1]
        B = self.episode_path[3][0] - self.episode_path[0][0]
        x_new_increment = B/(self.observations-1)
        y_new_increment = A/(self.observations-1)
        new_init_pos = (self.episode_path[3][0] + x_new_increment, self.episode_path[3][1] + y_new_increment)
        increments = []
        new_episode_path = []
        new_episode_path.append(new_init_pos)
        for i in range(0,self.observations-1):
            increments.append((x_new_increment, y_new_increment))

        for i in range(0,self.observations-1):
            listp = []
            listp.append(new_episode_path[i])
            listp.append(increments[i])
            new_episode_path.append(tuple([sum(tup) for tup in zip(*listp)]))
            
        self.episode_path = new_episode_path
        distance = np.sqrt(new_episode_path[3][0]**2 + (new_episode_path[3][1] - self.car_state[0])**2)
        self.select_action(action)
        self.car_state[0] = self.car_state[0] + self.car_state[1]*self.deltaT
        reward = 1/distance
        done = False
        if (distance <= 1):
          done = True
        return new_episode_path, reward, done
    
    def getImage(self,path):
        return OffsetImage(plt.imread(path, format="png"), zoom=0.15)   

    def getFig_toArray(self, fig, dpi=250):
        buf = io.BytesIO()
        fig.savefig(buf, format="png", dpi=dpi)
        buf.seek(0)
        img_arr = np.frombuffer(buf.getvalue(), dtype=np.uint8)
        buf.close()
        img = cv2.imdecode(img_arr, 1)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        return img
    
    def custom_render(self, path):
        fig, ax = plt.subplots()
        fig.subplots_adjust(0,0,1,1)
        ax.axis('off')
        plt.plot([0,0], [-100,100], linestyle='--', linewidth=3, color='black')
        plt.xlim([-100, 100])
        plt.ylim([-100, 100])
        plt.rcParams["figure.figsize"] = (5,5)
        car = AnnotationBbox( self.getImage('https://github.com/JHermosillaD/Reinforcement_learning/blob/main/environment_simulator_V1/imgs/car.png?raw=true'), (0, self.car_state[0] ), frameon=False)
        ax.add_artist(car)
        equis = []
        yes = []
        for i in path:
            pedestrian = AnnotationBbox( self.getImage('https://github.com/JHermosillaD/Reinforcement_learning/blob/main/environment_simulator_V1/imgs/pedestrian.png?raw=true'), i , frameon=False)
            equis.append(i[0])
            yes.append(i[1])
            ax.add_artist(pedestrian)
        plt.plot(equis, yes, linestyle='--', linewidth=1, color='black')
        plot_array = self.getFig_toArray(fig)
        plt.close()
        return plot_array

In [16]:
env = TC_environment()

In [17]:
fig, ax = plt.subplots()
fig.subplots_adjust(0,0,1,1)
ax.axis('off')

state = env.reset()
episode_images = []
for n_episode in range(10):
    im = ax.imshow(env.custom_render(state), aspect='auto')
    episode_images.append([im])
    action = env.action_space.sample()
    new_state, reward, done = env.step(action)
    state = new_state
plt.close()

In [18]:
experiment = animation.ArtistAnimation(fig, episode_images, interval=500, repeat=30, blit=True)
HTML(experiment.to_html5_video())