In [32]:
import numpy as np

# Define the RNN model using Keras
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Softmax

In [33]:
# Define the premade algorithm's function
def premade_algorithm():
    return np.random.choice(['rock', 'rock', 'rock', 'paper', 'paper', 'scissors'])

In [34]:
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(3, 3)))  # Input shape: [previous_move, opponent_move, result]
model.add(Dense(units=3, activation='softmax'))  # Output layer: probabilities of choosing rock, paper, or scissors

In [35]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [36]:
# Define the game rules
def play_round(player_move, opponent_move):
    if player_move == opponent_move:
        return 0  # Tie
    elif (player_move == 'rock' and opponent_move == 'scissors') or \
         (player_move == 'paper' and opponent_move == 'rock') or \
         (player_move == 'scissors' and opponent_move == 'paper'):
        return 1  # Win
    else:
        return -1  # Loss

In [41]:
# Training loop
num_episodes = 50
epsilon = 0.1  # Exploration rate

for episode in range(num_episodes):
    # Initialize game state
    player_move = 'rock'  # Start with a random move
    opponent_move = premade_algorithm()
    total_reward = 0
    
    for turn in range(30):  # Play 100 turns per episode
        # Encode game state
        if opponent_move == 'rock':
            opponent_encoded = [1, 0, 0]
        elif opponent_move == 'paper':
            opponent_encoded = [0, 1, 0]
        else:
            opponent_encoded = [0, 0, 1]
        
        # Play the game
        result = play_round(player_move, opponent_move)
        
        # Update input state
        if player_move == 'rock':
            player_encoded = [1, 0, 0]
        elif player_move == 'paper':
            player_encoded = [0, 1, 0]
        else:
            player_encoded = [0, 0, 1]
        
        input_state = np.array([[0, 0, 0], opponent_encoded, [result, 0, 0]])  # Previous move and result
        input_state[0] = player_encoded
        
        # Choose action
        if np.random.rand() < epsilon:
            # Explore: choose a random action
            action = np.random.randint(0, 3)
        else:
            # Exploit: choose action with highest probability from model
            action_probs = model.predict(np.expand_dims(input_state, axis=0))[0]
            action = np.argmax(action_probs)
        
        # Convert action to move
        if action == 0:
            player_move = 'rock'
        elif action == 1:
            player_move = 'paper'
        else:
            player_move = 'scissors'
        
        # Reward
        total_reward += result
        
        # Train the model
        target = np.zeros((1, 3))
        target[0, action] = total_reward
        
        
        model.fit(np.expand_dims(input_state, axis=0), target, verbose=0)
        
        # Update opponent's move for next turn
        opponent_move = premade_algorithm()
    print(target)        


[[0. 0. 0.]]
[[0. 0. 8.]]


KeyboardInterrupt: 

In [38]:
def decode_move(move):
    print(move)
    index = move.argmax(axis=0)
    if index == 0:
        return 'rock'
    elif index == 1:
        return 'paper'
    else:
        return 'scissors'

In [39]:
# Once trained, you can use the model to play against the premade algorithm
def play_against_premade(model):
    player_score = 0
    opponent_score = 0
    for _ in range(100):  # Play 100 rounds
        player_move = model.predict(np.expand_dims(input_state, axis=0))[0]
        opponent_move = premade_algorithm()
        
        player_move = decode_move(player_move)
        print("TURN")
        print(player_move + ' vs ' + opponent_move)

        result = play_round(player_move, opponent_move)
        if result == 1:
            player_score += 1
        elif result == -1:
            opponent_score += 1
    print("Player's score:", player_score)
    print("Opponent's score:", opponent_score)

# Test the trained model against the premade algorithm
play_against_premade(model)

[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs scissors
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs scissors
[0.26811978 0.4929738  0.23890638]
TURN
paper vs scissors
[0.26811978 0.4929738  0.23890638]
TURN
paper vs scissors
[0.26811978 0.4929738  0.23890638]
TURN
paper vs paper
[0.26811978 0.4929738  0.23890638]
TURN
paper vs paper
[0.26811978 0.4929738  0.23890638]
TURN
paper vs paper
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs paper
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811978 0.4929738  0.23890638]
TURN
paper vs rock
[0.26811