-
Notifications
You must be signed in to change notification settings - Fork 0
/
TicTacToeBot.py
63 lines (52 loc) · 2.49 KB
/
TicTacToeBot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Created on Fri Oct 23 16:33:00 2015
@author: Ralf Engelken, Franziska Neu
"""
from World_Model import World_Model
import Bot_Random
from Bot_RL_MLP_SARSA import Bot_RL_MLP
import numpy as np
class TicTacToeBot:
def __init__(self):
#Game Parameters
self.size_x = 3 #The x-dimension of the board
self.size_y = 3 #The y-dimension of the board
self.size_win = 3 #The number of stones in a row neccessary to win
self.gravity = False #True : only the x-position is used, the stones always fall down to the bottom-most empty field
self.initial_stones = 0 #the number of stones already on the field
self.initial_field = [0]*9
#RL and MLP Parameters
self.rl_reward = [0.0, 1.0, -1.0] #rewards for : Draw, Win, Loose
self.rl_beta = 2 #bot_RL_MLP so ca. 1 - 5
self.mlp_hidden = 10 #number of hidden neurons
self.mlp_learning_rate = 0.1 #learning-rate of the MLP
#Choose Bots
self.bot_2 = Bot_RL_MLP(self.size_x, self.size_y, self.rl_beta, self.mlp_hidden, self.mlp_learning_rate, self.rl_reward)
self.world = World_Model (self.size_x, self.size_y, self.size_win, self.gravity, initial_stones = self.initial_stones)
def load_data(self,filename):
self.bot_2.load_data(filename)
self.initial_field = self.bot_2.initial_field
def train(self, runs):
bot_1 = Bot_Random.Bot_Random_Static(3, 3)
for counter in range (runs):
#Play a game
self.world.new_init(initial_stones = self.initial_stones)
#Make a move until Game ends
while (self.world.get_winner() == -1):
if (self.world.active_player == 1):
(x, y) = bot_1.get_action(self.world)
self.world.perform_action(x, y)
bot_1.evaluate_action(self.world)
else:
(x, y) = self.bot_2.get_action(self.world)
self.world.perform_action(x, y)
self.bot_2.evaluate_action(self.world)
def get_action(self, sensor_info):
field = np.zeros(9)
for i in range(len(sensor_info)):
field[i] = sensor_info[i]
self.world.field = field
(x, y) = self.bot_2.get_action(self.world)
return (x, y)