In [1]:
import sys
sys.path.append('C:\Program Files\Python37\Lib\site-packages')

import pygame
import math
import random

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision

import matplotlib.pyplot as plt

import pong

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 5, 3)
        self.pool = nn.MaxPool2d(2, 2)
        width = (960//2//2//2//2 - 2)//2
        height = (540//2//2//2//2 - 2)//2
        self.fc1 = nn.Linear(width*height*5, 50)
        self.fc2 = nn.Linear(50, 10)
        self.fc3 = nn.Linear(10, 1)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [3]:
def epsilonGreedy(Qnet, game, epsilon):
    moves = ['up', 'down', 'stay']
    random.shuffle(moves)
    # random move
    if np.random.uniform() < epsilon:
        move = moves[random.sample(range(len(moves)), 1)[0]]
        game.makeMove(move, 'left')
        Q = Qnet(game.draw())
        # Undo the move
        if move == 'up':
            game.makeMove('down', 'left')
        elif move == 'down':
            game.makeMove('up', 'left')
    # greedy move
    else:
        qs = []
        for m in moves:
            game.makeMove(m, 'left')
            qs.append(Qnet(game.draw()))
            # Undo the move
            if m == 'up':
                game.makeMove('down', 'left')
            elif m == 'down':
                game.makeMove('up', 'left')
        move = moves[np.argmax(qs)]
        Q = np.max(qs)
    return move, Q

In [4]:
def randomMover(state):
    return random.choice(['up', 'down', 'stay'])

In [5]:
def policyMover(game):
    return game.bestMove()

In [6]:
def trainQnet(Qnet, optim, lossFunc, nReps, nIterations, epsilon, epsilonDecayFactor, game, opponentStrategyF):
    outcomes = np.zeros(nReps)
    quickWinRatio = np.zeros(nReps)

    for rep in range(nReps):
        if rep > 0:
            epsilon *= epsilonDecayFactor
        step = 0
        done = False
        
        states = torch.tensor([])
        Qs = torch.tensor([])
        
        # Don't let the game bug out
        game.clock.tick()
        
        # Pick our initial move
        move, Qnext = epsilonGreedy(Qnet, game, epsilon)
        while not done:
            
            # Neural network makes move
            game.makeMove(move, 'left')
            
            # Opponent makes move
            if not done:
                muv = opponentStrategyF(game)
                game.makeMove(muv, 'right')
                
            # Game updates ball position and checks if victory occurred
            done, Qgame = game.update()
            
            # Game returns its pixel values
            image = game.draw()
            
            # Game draws to screen
            game.display()
                
            if not done:
                # Figure out how we should next move and what the new score is
                move, Qnext = epsilonGreedy(Qnet, game, epsilon)
                Qgame = torch.clamp(Qnext + Qgame, -1, 1)
            else:
                # Record and print this outcome
                outcomes[rep] = Qgame
                print('Repetition:', rep, "Epsilon:", epsilon)
                print("Win ratio:", sum(outcomes == 1)/(rep+1))
                back = rep - 49 if rep > 49 else 0
                bot = 50 if rep > 49 else rep + 1
                quickWinRatio[rep] = sum(outcomes[back:rep+1] == 1)/bot
                print("Win ratio past 50:", quickWinRatio[rep])
               
            # Store the states and Qs so we can train at end
            states = torch.cat((states, image), dim=0)
            Qs = torch.cat((Qs, Qgame), dim=0)
            
        for i in range(nIterations):
            optim.zero_grad()
            out = network(states)
            loss = lossFunc(out, Qs)
            loss.backward(retain_graph=True)
            optimizer.step()

    print('DONE')
    return Qnet, outcomes, quickWinRatio

In [7]:
#network = CNN()#torch.load('pongSave.pt')
network = torch.load('pongSave.pt')
optimizer = optim.SGD(network.parameters(), lr=0.0005)
loss = nn.MSELoss()

nReps = 100000
nIterations = 1
epsilon = 0.3
epsilonDecayFactor = 0.999 

strat = policyMover

pygame.init()
pongGame = pong.Pong()

Qnet, outcome, steps, samples, quickWinRatio = trainQnet(network, optimizer, loss, nReps, nIterations, epsilon, epsilonDecayFactor, pongGame, strat)

Repetition: 0 Epsilon: 0.3
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 1 Epsilon: 0.29969999999999997
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 2 Epsilon: 0.29940029999999995
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 3 Epsilon: 0.2991008997
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 4 Epsilon: 0.2988017988003
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 5 Epsilon: 0.2985029970014997
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 6 Epsilon: 0.29820449400449817
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 7 Epsilon: 0.2979062895104937
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 8 Epsilon: 0.2976083832209832
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 9 Epsilon: 0.29731077483776225
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 10 Epsilon: 0.2970134640629245
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 11 Epsilon: 0.2967164505988616
Win ratio: 0.0
Win ratio past 50: 0.0
Repetition: 12 Epsilon: 0.2964197341482627
Win ratio: 0.0


KeyboardInterrupt: 

In [9]:
torch.save(network, 'pongSave.pt')

In [10]:
print(network.width)

AttributeError: 'CNN' object has no attribute 'width'