In [31]:
import numpy as np

# Define the RNN model using Keras
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Softmax

In [32]:
ACCEPT = 'accept'
DEFECT = 'defect'

In [33]:
# Define the premade algorithm's function
def premade_algorithm(last_opponent_move):
    if last_opponent_move == None:
        return ACCEPT
    return last_opponent_move

In [34]:
model = Sequential()
model.add(SimpleRNN(units=32, input_shape=(3, 2)))  # Input shape: [previous_move, opponent_move, result]
model.add(Dense(units=2, activation='softmax'))  # Output layer: probabilities of choosing rock, paper, or scissors

In [35]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy')

In [36]:
win_amt = 0
cooperate_amt = 1
mutual_betrayal_amt = 2
lose_amt = 3

In [37]:
# Define the game rules
def play_round(player_move, opponent_move):
    if player_move == DEFECT:
        if opponent_move == DEFECT:
            return mutual_betrayal_amt
        if opponent_move == ACCEPT:
            return win_amt
    if player_move == ACCEPT:
        if opponent_move == DEFECT:
            return lose_amt
        if opponent_move == ACCEPT:
            return cooperate_amt
    raise ValueError('Player Move: ' + player_move + ' Opponent Move: ' + opponent_move)

In [38]:
initial_move = ACCEPT
# initial_move = DEFECT

In [39]:
def encode_move(move):
    if move == ACCEPT:
        return [1, 0]
    elif move == DEFECT:
        return [0, 1]
    else:
        raise ValueError('Move was neither accept nor defect. Move was ' + move)

In [40]:
# Training loop
num_episodes = 50
epsilon = 0.1  # Exploration rate

for episode in range(num_episodes):
    # Initialize game state
    player_move = initial_move  # Start with a random move
    opponent_move = premade_algorithm(None)
    total_reward = 0
    
    for turn in range(30):  # Play 100 turns per episode
        last_player_move = player_move

        # Encode game state
        opponent_encoded = encode_move(opponent_move)
        
        # Update input state
        player_encoded = encode_move(player_move)
        
        # Play the game
        result = play_round(player_move, opponent_move)

        input_state = np.array([player_encoded, opponent_encoded, [result, 0]])  # Previous move and result
        
        # Choose action
        if np.random.rand() < epsilon:
            # Explore: choose a random action
            action = np.random.randint(0, 2)
        else:
            # Exploit: choose action with highest probability from model
            action_probs = model.predict(np.expand_dims(input_state, axis=0))[0]
            action = np.argmin(action_probs)
        
        # Convert action to move
        if action == 0:
            player_move = ACCEPT
        elif action == 1:
            player_move = DEFECT
        
        # Reward
        total_reward += result
        
        # Train the model
        target = np.zeros((1, 2))
        target[0, action] = total_reward
        
        
        model.fit(np.expand_dims(input_state, axis=0), target, verbose=0)
        
        # Update opponent's move for next turn
        opponent_move = premade_algorithm(last_player_move)
    print(target)        


[[41.  0.]]
[[ 0. 44.]]
[[ 0. 36.]]
[[46.  0.]]
[[41.  0.]]
[[ 0. 47.]]
[[ 0. 47.]]
[[ 0. 46.]]
[[ 0. 45.]]
[[42.  0.]]
[[41.  0.]]
[[45.  0.]]
[[44.  0.]]
[[ 0. 42.]]
[[41.  0.]]
[[ 0. 43.]]
[[ 0. 41.]]
[[47.  0.]]
[[47.  0.]]
[[47.  0.]]
[[39.  0.]]
[[43.  0.]]
[[47.  0.]]
[[42.  0.]]
[[ 0. 45.]]
[[ 0. 36.]]
[[37.  0.]]
[[47.  0.]]
[[ 0. 42.]]
[[45.  0.]]
[[44.  0.]]
[[ 0. 48.]]
[[ 0. 45.]]
[[42.  0.]]
[[ 0. 47.]]
[[40.  0.]]
[[ 0. 41.]]
[[ 0. 38.]]
[[47.  0.]]
[[ 0. 47.]]
[[46.  0.]]
[[ 0. 48.]]
[[ 0. 46.]]
[[ 0. 39.]]
[[ 0. 44.]]
[[40.  0.]]
[[44.  0.]]
[[ 0. 46.]]
[[38.  0.]]
[[ 0. 42.]]


In [41]:
def decode_move(move):
    print(move)
    index = move.argmax(axis=0)
    if index == 0:
        return ACCEPT
    elif index == 1:
        return DEFECT

In [43]:
# Once trained, you can use the model to play against the premade algorithm
def play_against_premade(model):
    player_score = 0
    opponent_score = 0
    last_move = ACCEPT
    for _ in range(100):  # Play 100 rounds
        player_move = model.predict(np.expand_dims(input_state, axis=0))[0]
        opponent_move = premade_algorithm(last_move)
        
        
        player_move = decode_move(player_move)
        last_move = player_move
        print("TURN")
        print(player_move + ' vs ' + opponent_move)

        result = play_round(player_move, opponent_move)
        opponent_result = play_round(opponent_move, player_move)

        player_score += result
        opponent_score += opponent_result
    print("Player's score:", player_score)
    print("Opponent's score:", opponent_score)

# Test the trained model against the premade algorithm
play_against_premade(model)

[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accept vs accept
[0.50108814 0.49891186]
TURN
accep