In [2]:
!pip install pygame



In [78]:
from cmath import sin
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
import math
import pygame
from pygame import gfxdraw
from pandas import array
from typing import Optional
from os import path

class BallBalancerEnv(Env):
    def __init__(self):
        #Defining our action space, can have three possible actions: clockwise turn, counterclockwise turn, and do not move
        self.action_space = Discrete(3)
        #max position of the ball before it falls off stick
        self.max_state = 20
        #Ball possible positions (low, high)
        self.observation_space = Box(low=np.array([-self.max_state]), high=np.array([self.max_state]))
        #Ball starting position
        self.state = self.observation_space.sample()/2
        #how many times the machine switches actions
        self.balancing_actions = 50
        #the angle of the stick -90, 90 for completely vertical
        self.stick_angle = 0
        #acceleration adds to velocity
        self.ball_velocity = 0
        #for rendering environment
        self.screen = None
        #screen dimensions (1000 by 1000 square)
        self.screen_dim = 1000
        
    def step(self, action):
        self.balancing_actions-= 1
        
        #lets say for each action, the stick rotates 1 degree
        self.stick_angle += action-1;
        
        #acceleration adds to velocity
        acc = -math.sin(math.radians(self.stick_angle)) * 9.81
        self.ball_velocity += acc
        
        # assuming each step is 1 sec, then self.state increments by velocity
        self.state += self.ball_velocity
        print(self.state)
        
        #reward system
        done = False
        if self.state >=-5 and self.state <=5: 
            reward = 1 
        elif self.state >= 20 or self.state <= -20:
            reward = -1
            done = True
        else: 
            reward = -1 
        
        if self.balancing_actions <= 0: 
            done = True
        
        
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info
    
    def render(self):
        #defining variables
        stateToWidthRatio = 20
        stickWidth = self.max_state * stateToWidthRatio * 2
        stickHeight = stickWidth / 5
        stickX = self.screen_dim / 2
        stickY = 2 * self.screen_dim / 3;
        ballRadius = 100.0
        disStickToBall = stickHeight/2+ballRadius
        newX = stickX + disStickToBall * math.sin(math.radians(self.stick_angle))
        newY = stickY - disStickToBall * math.cos(math.radians(self.stick_angle))
        ballX = newX - self.state * stateToWidthRatio * math.cos(math.radians(self.stick_angle))
        ballY = newY - self.state * stateToWidthRatio * math.sin(math.radians(self.stick_angle))
        
        l, r, t, b = (
            -stickWidth / 2,
            stickWidth / 2,
            stickHeight / 2,
            -stickHeight / 2,
        )
        
        #rotate stick by stick_angle
        stickCoords = []
        for coord in [(l, b), (l, t), (r, t), (r, b)]:
            coord = pygame.math.Vector2(coord).rotate_rad(math.radians(self.stick_angle))
            stickCoords.append(coord)
        
        #add stickX and stickY to stickCoords
        stickCoords = [(c[0] + stickX, c[1] + stickY) for c in stickCoords]

        #rendering image of environment, starting screen
        if self.screen is None:
            pygame.init()
            self.screen = pygame.display.set_mode((self.screen_dim, self.screen_dim))
        
        # Fill background
        background = pygame.Surface(self.screen.get_size())
        background = background.convert()
        background.fill((200, 200, 200))
        
        # Display some text
        font = pygame.font.Font(None, 36)
        string = "State: " + str(self.state)
        text = font.render(string, 1, (10, 10, 10))
        textpos = text.get_rect()
        textpos.centerx = background.get_rect().centerx
        background.blit(text, textpos)
        
        # Draw Stick
        gfxdraw.aapolygon(background, stickCoords, (202, 152, 101))
        gfxdraw.filled_polygon(background, stickCoords, (202, 152, 101))
        
        # Draw Ball
        gfxdraw.aacircle(
            background,
            int(ballX),
            int(ballY),
            int(ballRadius),
            (129, 132, 203),
        )
        gfxdraw.filled_circle(
            background,
            int(ballX),
            int(ballY),
            int(ballRadius),
            (129, 132, 203),
        )
        
        #This will pump the event queue and close the window and program
        #if the user clicks the close button of the window
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                raise SystemExit
                
        # Blit everything to the screen
        self.screen.blit(background, (0, 0))
        pygame.display.flip()
        return True
                         
    def reset(self):
        #reseting enviroment
        #ball placed at new random position
        self.state = self.observation_space.sample()/2
        #reseting balancing time
        self.balancing_actions = 50 
        #reseting velocity
        self.ball_velocity = 0
        #the angle of the stick -90, 90 for completely vertical
        self.stick_angle = 0
        return self.state

In [79]:
#import BallBalancerEnv as BBE
import math
import random
import time
env = BallBalancerEnv()

episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
        
        #help make it visual
        time.sleep(0.05)
        print(done)
    print('Episode:{} Score:{}'.format(episode, score))
    
pygame.display.quit() 
pygame.quit()

[-0.11305562]
False
[-0.28426373]
False
[-0.7978359]
False
[-1.8248239]
False
[-3.3652275]
False
[-5.589942]
False
[-8.328073]
False
[-11.408567]
False
[-14.66027]
False
[-18.254337]
False
[-22.190767]
True
Episode:1 Score:-1
[4.4443564]
False
[4.2731485]
False
[4.2731485]
False
[4.2731485]
False
[4.2731485]
False
[4.1019406]
False
[3.7595243]
False
[3.074744]
False
[2.2187557]
False
[1.1915592]
False
[0.16436267]
False
[-1.0340419]
False
[-2.5748105]
False
[-4.628995]
False
[-7.025543]
False
[-9.935507]
False
[-13.529782]
False
[-17.808369]
False
[-22.771267]
True
Episode:2 Score:9
[1.6590328]
False
[1.4878247]
False
[1.3166165]
False
[1.1454084]
False
[0.97420025]
False
[0.97420025]
False
[0.97420025]
False
[0.8029921]
False
[0.28941995]
False
[-0.73756796]
False
[-2.10692]
False
[-3.9896877]
False
[-6.2148194]
False
[-8.953367]
False
[-12.034279]
False
[-15.457555]
False
[-19.394245]
False
[-24.015247]
True
Episode:3 Score:6
[7.250821]
False
[7.5932374]
False
[7.9356537]
False
[8.27