In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
import random
import matplotlib.pyplot as plt
from collections import deque

#game stuff
from players_and_bots.rl_bot import RLBot  
from game.game import TexasHoldem
from players_and_bots.player import Player
import players_and_bots.bots as bots
from game.game_state import GameState

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [3]:

STATE_SIZE = 138  
ACTION_SIZE = 4   

rl_agent = RLBot("RLAgent", 1000, state_size=STATE_SIZE, action_size=ACTION_SIZE)

players = [
    bots.RandomBot("RandomBot1", 1000),
    bots.StrategicBot("StrategicBot", 1000, aggression_level=0.5, tightness_level=0.5, bluff_frequency=0.2),
    bots.StrategicBot("StrategicTight", 1000, aggression_level=0.6, tightness_level=0.7, bluff_frequency=0.3),
    rl_agent,
    bots.StrategicBot("StrategicLoose", 1000, aggression_level=0.6, tightness_level=0.2, bluff_frequency=0.3),
    bots.StrategicBot("StrategicAggro", 1000, aggression_level=0.8, tightness_level=0.5, bluff_frequency=0.65)
]


game = TexasHoldem(small_blind=5, big_blind=10, player_list=players)


In [4]:

num_episodes = 1000  
batch_size = 64     
update_target_freq = 10  


In [None]:
def test_state_vector_lengths(rl_agent):
    if not rl_agent.memory:
        print("Replay memory is empty.")
        return

    state_lengths = []
    for i, sample in enumerate(rl_agent.memory):
        state = sample[0] 
        if state is None:
            print(f"Sample {i}: State is None")
        else:
            state_length = len(state)
            state_lengths.append(state_length)
            print(f"Sample {i}: State length = {state_length}")

    if not state_lengths:
        print("\nAll states are None.")
        return

    unique_lengths = set(state_lengths)
    if len(unique_lengths) == 1:
        print(f"\nAll state vectors have consistent length: {unique_lengths.pop()}")
    else:
        print("\nInconsistent state vector lengths found.")
        print(f"Unique lengths: {unique_lengths}\n")
        
        
        length_to_samples = {}
        for idx, length in enumerate(state_lengths):
            length_to_samples.setdefault(length, []).append(idx)
        for length, samples in length_to_samples.items():
            print(f"State length {length} occurs in samples: {samples}")


test_state_vector_lengths(rl_agent)

Replay memory is empty.


In [None]:

episode_rewards = []
epsilon_values = []

for episode in range(1, num_episodes + 1):
    
    game.reset()
    # rl_agent.reset()
    # for player in other_players:
    #     player.reset()
    
    
    
    game.play()
    
    
    
    final_game_state = GameState.from_game(game)
    rl_agent.end_hand(final_game_state)
    
    total_reward = rl_agent.chips - 1000  # assuming starting chips is 1000
    episode_rewards.append(total_reward)
    epsilon_values.append(rl_agent.epsilon)
    
    if episode % update_target_freq == 0:
        rl_agent.update_target_model()
    

    if episode % 10 == 0:
        avg_reward = np.mean(episode_rewards[-10:])
        print(f"Episode {episode}/{num_episodes} - Average Reward: {avg_reward:.2f}, Epsilon: {rl_agent.epsilon:.2f}")


Episode 10/1000 - Average Reward: -362.17, Epsilon: 1.00
Episode 20/1000 - Average Reward: -466.84, Epsilon: 1.00
Episode 30/1000 - Average Reward: -149.02, Epsilon: 1.00
Episode 40/1000 - Average Reward: -269.31, Epsilon: 1.00
Episode 50/1000 - Average Reward: -205.75, Epsilon: 1.00
Episode 60/1000 - Average Reward: -144.90, Epsilon: 1.00
Episode 70/1000 - Average Reward: -323.01, Epsilon: 1.00
Episode 80/1000 - Average Reward: -136.03, Epsilon: 1.00
Episode 90/1000 - Average Reward: -186.83, Epsilon: 1.00
Episode 100/1000 - Average Reward: -257.85, Epsilon: 1.00
Episode 110/1000 - Average Reward: -90.38, Epsilon: 1.00
Episode 120/1000 - Average Reward: -255.45, Epsilon: 1.00
Episode 130/1000 - Average Reward: -356.47, Epsilon: 1.00
Episode 140/1000 - Average Reward: -161.80, Epsilon: 1.00
Episode 150/1000 - Average Reward: -174.22, Epsilon: 1.00
Episode 160/1000 - Average Reward: -203.11, Epsilon: 1.00
Episode 170/1000 - Average Reward: -12.83, Epsilon: 1.00
Episode 180/1000 - Averag

In [None]:
plt.figure(figsize=(12, 5))
plt.plot(episode_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Rewards Over Episodes')
plt.show()

plt.figure(figsize=(12, 5))
plt.plot(epsilon_values)
plt.xlabel('Episode')
plt.ylabel('Epsilon')
plt.title('Epsilon Decay Over Episodes')
plt.show()


In [None]:

rl_agent.epsilon = 0.0

evaluation_episodes = 10
for episode in range(1, evaluation_episodes + 1):
    game.reset()
    rl_agent.reset()
    for player in other_players:
        player.reset()
    
    game.play()
    final_game_state = GameState.from_game(game)
    rl_agent.end_hand(final_game_state)

    total_reward = rl_agent.chips - 1000  
    print(f"Evaluation Episode {episode} - Total Reward: {total_reward}")


In [None]:

rl_agent.save_model('rl_agent_model.h5')

# rl_agent.load_model('rl_agent_model.h5')
