<a href="https://colab.research.google.com/github/Fatkhi/Neural-networks/blob/master/tictactoe.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Copied from https://github.com/neilslater/game_playing_scripts

'''
   Copyright 2017 Neil Slater

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
'''

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import numpy as np
import csv
import random
from itertools import groupby

class TicTacToeGame():
    def __init__(self):
        self.state = '         '
        self.player = 'X'
        self.winner = None

    def allowed_moves(self):
        states = []
        for i in range(len(self.state)):
            if self.state[i] == ' ':
                states.append(self.state[:i] + self.player + self.state[i+1:])
        return states

    def make_move(self, next_state):
        if self.winner:
            raise(Exception("Game already completed, cannot make another move!"))
        if not self.__valid_move(next_state):
            raise(Exception("Cannot make move {} to {} for player {}".format(
                    self.state, next_state, self.player)))

        self.state = next_state
        self.winner = self.predict_winner(self.state)
        if self.winner:
            self.player = None
        elif self.player == 'X':
            self.player = 'O'
        else:
            self.player = 'X'

    def playable(self):
        return ( (not self.winner) and any(self.allowed_moves()) )

    def predict_winner(self, state):
        lines = [(0,1,2), (3,4,5), (6,7,8), (0,3,6), (1,4,7), (2,5,8), (0,4,8), (2,4,6)]
        winner = None
        for line in lines:
            line_state = state[line[0]] + state[line[1]] + state[line[2]]
            if line_state == 'XXX':
                winner = 'X'
            elif line_state == 'OOO':
                winner = 'O'
        return winner

    def __valid_move(self, next_state):
        allowed_moves = self.allowed_moves()
        if any(state == next_state for state in allowed_moves):
            return True
        return False

    def print_board(self):
        s = self.state
        print('     {} | {} | {} '.format(s[0],s[1],s[2]))
        print('    -----------')
        print('     {} | {} | {} '.format(s[3],s[4],s[5]))
        print('    -----------')
        print('     {} | {} | {} '.format(s[6],s[7],s[8]))


In [0]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(9, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, 1)
        self.optim = optim.SGD(self.parameters(), lr=.001)
        
    def forward(self, x):
        x = torch.tensor(x)
        x = x.view(-1, 9)
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def loss(self, output, target, **kwargs):
        self._loss = F.mse_loss(output, target, **kwargs)
        return self._loss

In [0]:
class Agent():
    def __init__(self, game_class, epsilon=0.1, player_mark='X', gamma=0.5):
        self.net = Net()
        self.NewGame = game_class
        self.epsilon = epsilon
        self.gamma = gamma
        self.player_mark = player_mark
    
    def transform(self, move):
        res = []
        for char in move:
            num = 0.
            if char == "X":
                num = 1.
            if char == "O":
                num = -1.
            res.append(num)
        return res

    def learn_game(self, num_episodes=1000):
        for episode in range(num_episodes):
            self.learn_from_episode()

    def learn_from_episode(self):
        game = self.NewGame()
        _, move = self.learn_select_move(game)
        while move:
            move = self.learn_from_move(game, move)

    def learn_from_move(self, game, move):
        game.make_move(move)
        r = self.__reward(game)
        target = r
        selected_next_move = None
        next_state_value = 0.0

        if game.playable():
            best_move, selected_next_move = self.learn_select_move(game)
            with torch.no_grad():
                next_state_value = self.net(self.transform(best_move))

        target = torch.tensor(r).view(-1,1) + self.gamma * next_state_value

        self.net.optim.zero_grad()
        curr_value = self.net(self.transform(move))
        loss = self.net.loss(curr_value, target)
        loss.backward()
        self.net.optim.step()

        return selected_next_move

    def learn_select_move(self, game):
        with torch.no_grad():
            moves_value = self.net([self.transform(m) for m in game.allowed_moves()])
            if game.player == self.player_mark:
                best_move = game.allowed_moves()[torch.argmax(moves_value)]
            else:
                best_move = game.allowed_moves()[torch.argmin(moves_value)]

        selected_next_move = best_move
        if random.random() < self.epsilon:
            selected_next_move = random.choice(game.allowed_moves())

        return (best_move, selected_next_move)

    def play_select_move(self, game):
        with torch.no_grad():
            moves_value = self.net([self.transform(m) for m in game.allowed_moves()])
            if game.player == self.player_mark:
                return game.allowed_moves()[torch.argmax(moves_value)]
            else:
                return game.allowed_moves()[torch.argmin(moves_value)]

    def demo_game(self, verbose=False):
        game = self.NewGame()
        t = 0
        while game.playable():
            if verbose:
                print(" \nTurn {}\n".format(t))
                game.print_board()
            move = self.play_select_move(game)
            game.make_move(move)
            t += 1
        if verbose:
            print(" \nTurn {}\n".format(t))
            game.print_board()
        if game.winner:
            if verbose:
                print("\n{} is the winner!".format(game.winner))
            return game.winner
        else:
            if verbose:
                print("\nIt's a draw!")
            return '-'

    def random_move(self, game):
        return random.choice(game.allowed_moves())

    def random_game(self, agent_player='X', verbose=False):
        game = self.NewGame()
        t = 0
        while game.playable():
            move = None
            if verbose:
                print(" \nTurn {}\n".format(t))
                game.print_board()
            if game.player == agent_player:
                move = self.play_select_move(game)
            else:
                move = self.random_move(game)

            game.make_move(move)
            t += 1
        if verbose:
            print(" \nTurn {}\n".format(t))
            game.print_board()
        if game.winner:
            if verbose:
                print("\n{} is the winner!".format(game.winner))
            return game.winner
        else:
            if verbose:
                print("\nIt's a draw!")
            return '-'

    def interactive_game(self, agent_player='X'):
        game = self.NewGame()
        t = 0
        while game.playable():
            print(" \nTurn {}\n".format(t))
            game.print_board()
            if game.player == agent_player:
                move = self.play_select_move(game)
                game.make_move(move)
            else:
                move = self.__request_human_move(game)
                game.make_move(move)
            t += 1

        print(" \nTurn {}\n".format(t))
        game.print_board()

        if game.winner:
            print("\n{} is the winner!".format(game.winner))
            return game.winner
        print("\nIt's a draw!")
        return '-'

    def __reward(self, game):
        if game.winner == self.player_mark:
            return 1.0
        elif game.winner:
            return -1.0
        else:
            return 0.0

    def __request_human_move(self, game):
        allowed_moves = [i+1 for i in range(9) if game.state[i] == ' ']
        human_move = None
        while not human_move:
            idx = int(input('Choose move for {}, from {} : '.format(game.player, allowed_moves)))
            if any([i==idx for i in allowed_moves]):
                human_move = game.state[:idx-1] + game.player + game.state[idx:]
        return human_move

In [0]:
def demo_game_stats(agent, agent_player='X'):
    results = [agent.random_game(agent_player) for i in range(10000)]
    game_stats = {k: results.count(k)/100 for k in ['X', 'O', '-']}
    print("For {} percentage results: {}".format(agent_player, game_stats))
    return game_stats

In [0]:
agent = Agent(TicTacToeGame, epsilon = 0.5, gamma = 0.9)

In [6]:
games_for_iteration = 1000
for i in range(1, 1001):
    agent.learn_game(games_for_iteration)
    print('{} games learned: '.format(i * games_for_iteration))
    print('Self play result:' )
    print(agent.demo_game())
    if demo_game_stats(agent)["O"] == 0.0 and demo_game_stats(agent, agent_player="O")["X"] == 0.0:
        break

1000 games learned: 
Self play result:
X
For X percentage results: {'X': 89.94, 'O': 6.05, '-': 4.01}
2000 games learned: 
Self play result:
X
For X percentage results: {'X': 80.63, 'O': 17.4, '-': 1.97}
3000 games learned: 
Self play result:
X
For X percentage results: {'X': 90.44, 'O': 5.82, '-': 3.74}
4000 games learned: 
Self play result:
-
For X percentage results: {'X': 96.97, 'O': 1.3, '-': 1.73}
5000 games learned: 
Self play result:
-
For X percentage results: {'X': 94.17, 'O': 2.65, '-': 3.18}
6000 games learned: 
Self play result:
X
For X percentage results: {'X': 95.91, 'O': 2.82, '-': 1.27}
7000 games learned: 
Self play result:
-
For X percentage results: {'X': 97.51, 'O': 0.0, '-': 2.49}
For O percentage results: {'X': 8.01, 'O': 79.62, '-': 12.37}
8000 games learned: 
Self play result:
-
For X percentage results: {'X': 98.38, 'O': 0.0, '-': 1.62}
For O percentage results: {'X': 8.33, 'O': 82.58, '-': 9.09}
9000 games learned: 
Self play result:
X
For X percentage result

In [7]:
agent.demo_game(True)

 
Turn 0

       |   |   
    -----------
       |   |   
    -----------
       |   |   
 
Turn 1

       |   |   
    -----------
       | X |   
    -----------
       |   |   
 
Turn 2

       |   |   
    -----------
       | X |   
    -----------
       |   | O 
 
Turn 3

       |   |   
    -----------
       | X |   
    -----------
       | X | O 
 
Turn 4

       | O |   
    -----------
       | X |   
    -----------
       | X | O 
 
Turn 5

       | O | X 
    -----------
       | X |   
    -----------
       | X | O 
 
Turn 6

       | O | X 
    -----------
       | X |   
    -----------
     O | X | O 
 
Turn 7

       | O | X 
    -----------
     X | X |   
    -----------
     O | X | O 
 
Turn 8

       | O | X 
    -----------
     X | X | O 
    -----------
     O | X | O 
 
Turn 9

     X | O | X 
    -----------
     X | X | O 
    -----------
     O | X | O 

It's a draw!


'-'