In [1]:
import numpy as np
import pygame as pg


In [4]:
"""pong table dimensions"""
WIDTH = HEIGHT = 1

"""pong peddles dimensions"""
P_W = 0.2
P_H = 0.02

"""pong peddles y positions"""
Y0 = 0.9
Y1 = 0.1

"""ball attributes"""
BALL_R = 0.04
BALL_VY = 1
BALL_VX = 0

"""state vector indices"""
X0 = 0  # x position of peddle 0
X1 = 1  # x position of peddle 1
X_B = 2  # x position of ball
Y_B = 3  # y position of ball
VX_B = 4  # vx of ball
VY_B = 5  # vy of ball

dt = 0.01

A = [-1, 0, 1]

In [5]:
FPS = 40
BG_COLOR = pg.Color(200, 200, 200)
CURVE_COLOR = pg.Color(70, 70, 70)
BALL_COLOR = pg.Color(200, 70, 70)

SCALE = 500
PAD = 200

X_MIN, X_MAX = -1.2, 0.5
Y_MAX, Y_MIN = 1, 0

WIDTH = int((X_MAX - X_MIN) * SCALE  + PAD)
HEIGHT = int((Y_MAX - Y_MIN) * SCALE  + PAD)


def transform(x, y):
    """transform an xy coordinate to pygame screen coordinates"""
    return (x + 1.2) * SCALE + PAD / 2, (1 - y) * SCALE + PAD / 2

def curve(x):
    return 0.45 * np.sin(3*x) + 0.55


class PgCar:

    def __init__(self):
        self.screen, self.bg = self.init()
        
    def init(self):
        pg.init()  # initialize pygame
        screen = pg.display.set_mode((WIDTH, HEIGHT))  # set up the screen
        pg.display.set_caption("Mohamed Martini")  # add a caption
        bg = pg.Surface(screen.get_size())  # get a background surface
        bg = bg.convert()
        bg.fill(BG_COLOR)
        screen.blit(bg, (0, 0))
        return screen, bg

    def draw_curve(self):
        start = None
        for x in np.arange(X_MIN, X_MAX, 0.001):
            end = transform(x, curve(x))
            try:
                pg.draw.line(self.screen, CURVE_COLOR, start, end, width=7)
            except:
                continue
            finally:
                start = end

    def render(self):
        """show the grid array on the screen"""
        pg.display.flip()
        pg.display.update()
    
    def reset_screen(self):
        self.screen.fill(BG_COLOR)
        self.draw_curve()
    
    def animate(self, X):
        """receive a list of positions on the x axis, and plot the movement of the screen"""
        clock = pg.time.Clock()
        radius = 20
        i = 0
        num_steps = len(X)
        run = True
        while run:
            clock.tick(FPS)
            for event in pg.event.get():
                if event.type == pg.QUIT:
                    run = False
            if i == num_steps - 1:
                continue
            i += 1
            center = np.array((X[i], curve(X[i])))
            center = transform(*center)
            self.reset_screen()
            pg.draw.circle(self.screen, CAR_COLOR, center, radius, width=radius)
            self.render()
        pg.quit()

# X = episode()      
# PgCar().animate(X)


In [44]:
def pong_transition(s, a):
    """
    given state and action vectors, return next state vector and reward
    state vector is <x_{p0}, x_{p1}, x_{ball}, y_{ball}, v_x_{ball}, v_y_{ball}>
    action_vector is <v_x_{p0}, v_x_{p1}}>
    next state vector is <x_{p0} + v_x_{p0}dt, x_{p1} + v_x_{p1}dt, x_{ball} + v_x_{ball}dt, y_{ball} + v_y_{ball}dt, v_x_{ball}_{new}, v_y_{ball}_{new}>
    """

    # get the peddles next positions
    # if action takes peddle off the screen, effective action (peddle velocity) is 0
    s_p = np.copy(s)
    p_trans = s[: X_B] + a * dt
    a[(p_trans < P_W/2) | (p_trans > WIDTH-P_W/2)] = 0
    s_p[: X_B] += a * dt
    
    r = np.zeros(2)
    terminal = False
    # if ball touches either peddle, reverse ball y velocity, and add peddle x velocity to ball x velocity
    if s[Y_B] - BALL_R <= Y1:
        # if ball is as high as the top peddle
        if s[X1] - P_W/2 <= x[X_B] <= s[X1] + P_W/2:
            # if ball is on top peddle, 
            # flip y velocity, and add peddle x velocity to ball x velocity
            s_p[VY_B] *= -1
            s_p[VX_B] += a[1]
        else:
            r[1] = -1
            terminal = True
            
    elif s[Y_B] + BALL_R >= Y0:
        # if ball is as high as the top peddle
        if s[X0] - P_W/2 <= s[X_B] <= s[X0] + P_W/2:
            # if ball is on top peddle, 
            # flip y velocity, and add peddle x velocity to ball x velocity
            s_p[VY_B] *= -1
            s_p[VX_B] += a[0]
        else:
            r[0] = -1
            terminal = True

    # if ball touches sides, reverse ball x velocity
    if s[X_B] <= BALL_R or s[X_B] >= 1-BALL_R:
        s_p[VX_B] *= -1
        
    # transition ball according to its velocity
    s_p[X_B: VX_B] += s_p[VX_B: ] * dt

    return s_p, r, terminal
    

In [45]:
s = np.array([0.9, 0.9, 0.2, 0.9, 0.5, 0.5])
a = np.array([1, 1])
pong_transition(s, a)

(array([0.9  , 0.9  , 0.205, 0.905, 0.5  , 0.5  ]), array([-1.,  0.]), True)