In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
class Bot(object):
    # our states can be either "ROCK, PAPER or SCISSORS"
    state_space = 3
    # three actions by our player
    action_space = 3
    q_table = np.random.uniform(low = -2, high = 5, size = (3, 3))
    total_reward, reward = 0,0
    total_wincount,wincount=0,0
    total_tiecount,tiecount=0,0
    total_lostcount,lostcount=0,0
    avg_rewards_list = []
    avg_reward = 0
    result = 'DRAW'
    tags = ["ROCK", "PAPER", "SCISSORS"]
    # looses to map
    loses_to = {
       "0": 1, # rock loses to paper
       "1": 2,  # paper loses to scissor
       "2": 0  # scissor loses to rock
    }
    
    def __init__(self, alpha=0.5, gamma=0.2, epsilon=0.8, min_eps=0, episodes=50):
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.min_eps = min_eps
        self.episodes = episodes
        # Calculate episodic reduction in epsilon
        self.reduction = (epsilon - min_eps) / episodes
        
    # either explore or exploit, any which ways return the next action
    def bot_move(self, player_move):
        action = 0
        # Determine next action - epsilon greedy strategy
        if np.random.random() < 1 - self.epsilon:
            print("Exploiting....")
            action = np.argmax(self.q_table[player_move])
        else:
            print("Exploring.....")
            action = np.random.randint(0, self.action_space)
        # Decay epsilon
        if self.epsilon > self.min_eps:
            self.epsilon -= self.reduction
        print("choose ",self.tags[action])
        return action
    
    def get_reward(self, player, bot):
        reward = 0
        wincount=0
        tiecount,lostcount=0,0
        if self.get_result(player, bot) == 'WIN':
            reward = 1
            wincount=1
        elif self.get_result(player, bot) == 'LOSE':
            reward = 0
            lostcount=1
        else:
            # Draw case
            reward = -1
            tiecount=1
        return reward,wincount,tiecount,lostcount
    
    # update q_table
    def update_experience(self, state, action, reward):
        delta = self.alpha * (reward + self.gamma * np.max(self.q_table[action]) - self.q_table[state,action])
        self.q_table[state,action] += delta
    
    def print_stats(self, player, bot, reward):
        print("Player move : {0}, bot: {1}, reward: {2}, result: {3}, total_reward: {4}".format(self.tags[player], self.tags[bot], reward, self.result, self.total_reward))
        print(self.q_table)
        print("Total Win count:",self.total_wincount,"||","Win rate:",(self.total_wincount/201)*100)
        print("Total Tie count:",self.total_tiecount,"||","Tie rate:",(self.total_tiecount/201)*100)
        print("Total Lost count:",self.total_lostcount,"||","Lost rate:",(self.total_lostcount/201)*100)
        pass
    
    # returns either a WIN, LOSE or a DRAW to indicate the same.
    def get_result(self, player_move, bot_move):
        if bot_move == player_move:
            self.result = 'DRAW'
        elif self.loses_to[str(bot_move)] == player_move :
            self.result = 'LOSE'
        else:
            self.result = 'WIN'
            
        return self.result    
            
    def get_avg_rewards(self):
        return self.avg_rewards_list
    
    def play(self, player_move):
        # add reward
        bot_move = self.bot_move(player_move)
        reward,wincount,tiecount,lostcount = self.get_reward(player_move, bot_move)
        self.total_reward += reward
        self.total_wincount+=wincount
        self.total_tiecount+=tiecount
        self.total_lostcount+=lostcount
        self.avg_rewards_list.append(reward)
        # update experience
        self.update_experience(player_move, bot_move, reward)
        self.print_stats(player_move, bot_move, reward)

class Game(object):

    def __init__(self, bot, episodes=200):
        self.bot = bot
        self.episodes = episodes
    
    def begin(self):
        player_moves=[1,2]*100
        for idx in range(0, self.episodes):
            #player_moves=int(input("Enter your move (0- Rock, 1 - paper, 2- scissors): "))
            self.bot.play(player_moves[idx])
        # Plot Rewards
#         plt.plot(np.arange(len(self.bot.get_avg_rewards())) + 1)
#         plt.savefig('rewards.png')     
#         plt.close()

    def generate_fake_moves(self, fake_moves):
        player_moves = []
        for i in range(0, self.episodes):
            if fake_moves:
                player_moves.append(np.random.randint(0, 3))
            else:
                self.read_player_move()
        return player_moves

    def read_player_move(self):
        return int(input("Enter your move (0- Rock, 1 - paper, 2- scissors): "))

game = Game(Bot())
game.begin()

Exploring.....
choose  ROCK
Player move : PAPER, bot: ROCK, reward: 0, result: LOSE, total_reward: 0
[[ 3.34011095  3.62087154 -0.95268142]
 [ 2.70270459  2.92050945  3.06939249]
 [-1.75427671  1.81077368  2.58072783]]
Total Win count: 0 || Win rate: 0.0
Total Tie count: 0 || Tie rate: 0.0
Total Lost count: 1 || Lost rate: 0.4975124378109453
Exploring.....
choose  SCISSORS
Player move : SCISSORS, bot: SCISSORS, reward: -1, result: DRAW, total_reward: -1
[[ 3.34011095  3.62087154 -0.95268142]
 [ 2.70270459  2.92050945  3.06939249]
 [-1.75427671  1.81077368  1.0484367 ]]
Total Win count: 0 || Win rate: 0.0
Total Tie count: 1 || Tie rate: 0.4975124378109453
Total Lost count: 1 || Lost rate: 0.4975124378109453
Exploring.....
choose  SCISSORS
Player move : PAPER, bot: SCISSORS, reward: 1, result: WIN, total_reward: 0
[[ 3.34011095  3.62087154 -0.95268142]
 [ 2.70270459  2.92050945  2.21577361]
 [-1.75427671  1.81077368  1.0484367 ]]
Total Win count: 1 || Win rate: 0.4975124378109453
Total T

Total Lost count: 7 || Lost rate: 3.482587064676617
Exploring.....
choose  SCISSORS
Player move : SCISSORS, bot: SCISSORS, reward: -1, result: DRAW, total_reward: 7
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.60091264  1.34586983]
 [ 1.71738046  0.52100395 -0.44140869]]
Total Win count: 17 || Win rate: 8.45771144278607
Total Tie count: 10 || Tie rate: 4.975124378109453
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  SCISSORS
Player move : PAPER, bot: SCISSORS, reward: 1, result: WIN, total_reward: 8
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.60091264  1.34467296]
 [ 1.71738046  0.52100395 -0.44140869]]
Total Win count: 18 || Win rate: 8.955223880597014
Total Tie count: 10 || Tie rate: 4.975124378109453
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploring.....
choose  ROCK
Player move : SCISSORS, bot: ROCK, reward: 1, result: WIN, total_reward: 9
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.60091264  1.34467296]
 [ 

Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  ROCK
Player move : SCISSORS, bot: ROCK, reward: 1, result: WIN, total_reward: 43
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483481]
 [ 1.7241743   0.52100395 -0.44140869]]
Total Win count: 54 || Win rate: 26.865671641791046
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  SCISSORS
Player move : PAPER, bot: SCISSORS, reward: 1, result: WIN, total_reward: 44
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483484]
 [ 1.7241743   0.52100395 -0.44140869]]
Total Win count: 55 || Win rate: 27.363184079601986
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  ROCK
Player move : SCISSORS, bot: ROCK, reward: 1, result: WIN, total_reward: 45
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483484]
 [ 1.72

[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win count: 88 || Win rate: 43.78109452736319
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  SCISSORS
Player move : PAPER, bot: SCISSORS, reward: 1, result: WIN, total_reward: 78
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win count: 89 || Win rate: 44.27860696517413
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  ROCK
Player move : SCISSORS, bot: ROCK, reward: 1, result: WIN, total_reward: 79
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win count: 90 || Win rate: 44.776119402985074
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 ||

Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  ROCK
Player move : SCISSORS, bot: ROCK, reward: 1, result: WIN, total_reward: 109
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win count: 120 || Win rate: 59.70149253731343
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  SCISSORS
Player move : PAPER, bot: SCISSORS, reward: 1, result: WIN, total_reward: 110
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win count: 121 || Win rate: 60.19900497512438
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  ROCK
Player move : SCISSORS, bot: ROCK, reward: 1, result: WIN, total_reward: 111
[[ 3.34011095  3.62087154 -0.95

Exploiting....
choose  SCISSORS
Player move : PAPER, bot: SCISSORS, reward: 1, result: WIN, total_reward: 140
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win count: 151 || Win rate: 75.12437810945273
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  ROCK
Player move : SCISSORS, bot: ROCK, reward: 1, result: WIN, total_reward: 141
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win count: 152 || Win rate: 75.62189054726367
Total Tie count: 11 || Tie rate: 5.472636815920398
Total Lost count: 7 || Lost rate: 3.482587064676617
Exploiting....
choose  SCISSORS
Player move : PAPER, bot: SCISSORS, reward: 1, result: WIN, total_reward: 142
[[ 3.34011095  3.62087154 -0.95268142]
 [ 1.21880688 -0.66601085  1.34483486]
 [ 1.72417431  0.52100395 -0.44140869]]
Total Win c