In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from collections import defaultdict
import random
import pickle
import os

In [None]:
class PokerAgent:
    def __init__(self, model_path=None, learning_rate=0.001, min_bankroll_threshold=200):
        self.min_bankroll_threshold = min_bankroll_threshold
        self.hand_weights = {
            'royal_flush': 1.0,
            'straight_flush': 0.95,
            'four_of_a_kind': 0.9,
            'full_house': 0.85,
            'flush': 0.8,
            'straight': 0.75,
            'three_of_a_kind': 0.5,
            'two_pair': 0.4,
            'one_pair': 0.3,
            'high_card': 0.1
        }
        
        # Bias values for different actions
        self.action_bias = {
            'raise': 0.3,
            'call': 0.1,
            'fold': -0.2
        }
        
        # Player profiles - will be updated during gameplay
        self.player_profiles = {}
        
        # Create or load the model
        if model_path and os.path.exists(model_path):
            self.model = keras.models.load_model(model_path)
        else:
            self.model = self._create_model(learning_rate)
            
        # Memory for experience replay
        self.memory = []
        self.gamma = 0.95  # discount factor
        
    def _create_model(self, learning_rate):
        """Create the neural network model"""
        model = keras.Sequential([
            keras.layers.Dense(128, activation='relu', input_shape=(17,)),  # 10 cards (suit+rank) + 7 game state features
            keras.layers.Dense(128, activation='relu'),
            keras.layers.Dropout(0.2),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(3, activation='linear')  # Output: Q-values for [fold, call, raise]
        ])
        model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
                     loss='mse')
        return model
    
    def preprocess_cards(self, cards):
        """Convert card information to numerical features"""
        # cards is a list of tuples (suit, rank)
        features = []
        
        for card in cards:
            suit, rank = card
            # Convert suit to one-hot
            suit_val = {'Hearts': 0, 'Diamonds': 1, 'Clubs': 2, 'Spades': 3}.get(suit, 0)
            
            # Convert rank to numerical value
            try:
                rank_val = {'2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, 
                           '9': 9, '10': 10, 'Jack': 11, 'Queen': 12, 'King': 13, 'Ace': 14}.get(rank, 0)
            except:
                # If rank is already a number
                rank_val = int(rank) if isinstance(rank, (int, str)) and rank.isdigit() else 0
                
            features.extend([suit_val, rank_val])
            
        # Pad if fewer than 5 cards
        while len(features) < 10:
            features.extend([0, 0])
            
        return features
    
    def calculate_hand_probability(self, cards, community_cards=None):
        """Calculate the probability of making different poker hands"""
        # Combine hole cards and community cards
        all_cards = cards.copy()
        if community_cards:
            all_cards.extend(community_cards)
            
        # Count ranks and suits
        rank_count = defaultdict(int)
        suit_count = defaultdict(int)
        
        for card in all_cards:
            suit, rank = card
            rank_count[rank] += 1
            suit_count[suit] += 1
        
        # Check for pairs, three of a kind, etc.
        pairs = sum(1 for count in rank_count.values() if count == 2)
        three_of_a_kind = any(count == 3 for count in rank_count.values())
        four_of_a_kind = any(count == 4 for count in rank_count.values())
        flush_possible = any(count >= 5 for count in suit_count.values())
        
        # Calculate straight possibility
        ranks = [{'2': 2, '3': 3, '4': 4, '5': 5, '6': 6, '7': 7, '8': 8, 
                 '9': 9, '10': 10, 'Jack': 11, 'Queen': 12, 'King': 13, 'Ace': 14}.get(r, 0) for _, r in all_cards]
        ranks = sorted(set(ranks))
        straight_possible = False
        
        for i in range(len(ranks) - 4):
            if ranks[i+4] - ranks[i] == 4:
                straight_possible = True
                break
        
        # Simple probability estimates
        probabilities = {
            'royal_flush': 0.0000015 if flush_possible and straight_possible else 0,
            'straight_flush': 0.00001 if flush_possible and straight_possible else 0,
            'four_of_a_kind': 0.1 if four_of_a_kind else 0.01 if three_of_a_kind else 0.001,
            'full_house': 0.1 if three_of_a_kind and pairs else 0.05 if three_of_a_kind else 0.01,
            'flush': 0.1 if flush_possible else 0.02,
            'straight': 0.1 if straight_possible else 0.02,
            'three_of_a_kind': 0.2 if three_of_a_kind else 0.05,
            'two_pair': 0.3 if pairs >= 2 else 0.1,
            'one_pair': 0.4 if pairs >= 1 else 0.2,
            'high_card': 1.0  # Always have at least a high card
        }
        
        # Adjust based on the stage of the game
        if community_cards:
            if len(community_cards) >= 3:  # After flop
                for hand_type in probabilities:
                    probabilities[hand_type] *= 2
            if len(community_cards) >= 4:  # After turn
                for hand_type in probabilities:
                    probabilities[hand_type] *= 1.5
                    
        return probabilities
    
    def get_state_features(self, cards, community_cards, pot_size, current_bet, bankroll, round_num, player_position, num_players_active):
        """Create a feature vector representing the current game state"""
        card_features = self.preprocess_cards(cards + community_cards)
        
        # Calculate hand probabilities
        probs = self.calculate_hand_probability(cards, community_cards)
        
        # Compute a hand strength score (0-1)
        hand_strength = sum(prob * self.hand_weights[hand_type] for hand_type, prob in probs.items())
        
        # Additional game state features
        pot_odds = current_bet / (pot_size + current_bet) if pot_size + current_bet > 0 else 0
        bankroll_ratio = bankroll / self.min_bankroll_threshold
        round_feature = round_num / 4  # Normalize round (pre-flop, flop, turn, river)
        position_feature = player_position / num_players_active  # Relative position
        
        # Combine all features
        game_features = [hand_strength, pot_odds, bankroll_ratio, round_feature, position_feature, pot_size/100, current_bet/100]
        
        return np.array(card_features + game_features).reshape(1, -1)
    
    def update_player_profile(self, player_name, action, bet_amount, cards=None):
        """Update the player profile based on observed actions"""
        if player_name not in self.player_profiles:
            self.player_profiles[player_name] = {
                'actions': {'fold': 0, 'call': 0, 'raise': 0},
                'avg_bet': 0,
                'num_bets': 0,
                'bluff_probability': 0.5,  # Initial assumption
                'observed_hands': []
            }
        
        profile = self.player_profiles[player_name]
        profile['actions'][action] += 1
        
        if action in ['call', 'raise']:
            profile['avg_bet'] = (profile['avg_bet'] * profile['num_bets'] + bet_amount) / (profile['num_bets'] + 1)
            profile['num_bets'] += 1
            
        if cards:
            profile['observed_hands'].append(cards)
            
        # Calculate aggression factor
        total_actions = sum(profile['actions'].values())
        if total_actions > 0:
            profile['aggression'] = (profile['actions']['raise'] + profile['actions']['call'] * 0.5) / total_actions
        
        # Save updated profile
        self.player_profiles[player_name] = profile
    
    def predict_action(self, state_features, epsilon=0.1):
        """Predict the best action using epsilon-greedy policy"""
        if random.random() < epsilon:
            # Exploration: choose a random action
            return random.choice(['fold', 'call', 'raise'])
        else:
            # Exploitation: choose the best action based on Q-values
            q_values = self.model.predict(state_features)[0]
            action_idx = np.argmax(q_values)
            return ['fold', 'call', 'raise'][action_idx]
    
    def remember(self, state, action, reward, next_state, done):
        """Store experience in memory for later training"""
        action_idx = ['fold', 'call', 'raise'].index(action)
        self.memory.append((state, action_idx, reward, next_state, done))
    
    def replay(self, batch_size=32):
        """Train the model using experience replay"""
        if len(self.memory) < batch_size:
            return
        
        batch = random.sample(self.memory, batch_size)
        for state, action_idx, reward, next_state, done in batch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model.predict(next_state)[0])
            
            target_f = self.model.predict(state)
            target_f[0][action_idx] = target
            
            self.model.fit(state, target_f, epochs=1, verbose=0)
    
    def make_decision(self, cards, community_cards, pot_size, current_bet, bankroll, round_num, player_position, num_players_active, opponents):
        """Make a decision based on the current game state"""
        # If bankroll is low, be more conservative
        if bankroll < self.min_bankroll_threshold:
            # Only play with very strong hands
            probs = self.calculate_hand_probability(cards, community_cards)
            top_hands = ['royal_flush', 'straight_flush', 'four_of_a_kind', 'full_house', 'flush']
            good_hand = any(probs[hand] > 0.1 for hand in top_hands)
            
            if not good_hand:
                return 'fold', 0
        
        # Get state features
        state_features = self.get_state_features(cards, community_cards, pot_size, current_bet, bankroll, round_num, player_position, num_players_active)
        
        # Consider opponent tendencies
        for opponent in opponents:
            if opponent in self.player_profiles:
                profile = self.player_profiles[opponent]
                # Adjust strategy based on opponent profile
                if profile.get('aggression', 0.5) > 0.7:  # Aggressive opponent
                    self.action_bias['raise'] *= 0.8  # Be more cautious with raising
                elif profile.get('aggression', 0.5) < 0.3:  # Passive opponent
                    self.action_bias['raise'] *= 1.2  # More aggressive against passive players
        
        # After the third round with no strong hands, fold
        if round_num >= 3:
            probs = self.calculate_hand_probability(cards, community_cards)
            top_five_hands = ['royal_flush', 'straight_flush', 'four_of_a_kind', 'full_house', 'flush']
            
            # Check if we have any decent probability for top 5 hands
            good_hand = any(probs[hand] > 0.2 for hand in top_five_hands)
            if not good_hand:
                return 'fold', 0
        
        # Predict the best action
        action = self.predict_action(state_features)
        
        # Calculate bet amount if raising
        if action == 'raise':
            hand_probs = self.calculate_hand_probability(cards, community_cards)
            # Higher bet for stronger hands
            hand_strength = sum(prob * self.hand_weights[hand_type] for hand_type, prob in hand_probs.items())
            bet_amount = min(int(current_bet * (1 + hand_strength)), bankroll // 4)
            return action, bet_amount
        elif action == 'call':
            return action, current_bet
        else:
            return action, 0
    
    def train_on_dataset(self, dataset_path, epochs=10):
        """Train the initial model on historical poker data"""
        data = pd.read_csv(dataset_path)
        
        # Preprocess dataset
        X = []  # Features
        y = []  # Target actions
        
        for _, row in data.iterrows():
            # Extract card information
            cards = []
            for i in range(1, 6):
                suit = row[f'Suit of Card {i}']
                rank = row[f'Rank of Card {i}']
                cards.append((suit, rank))
            
            # Calculate hand probabilities
            hand_type = row['Poker Hand']
            probs = self.calculate_hand_probability(cards)
            
            # Create synthetic game state
            pot_size = random.randint(50, 500)
            current_bet = random.randint(10, 100)
            bankroll = random.randint(100, 1000)
            round_num = random.randint(0, 3)
            player_position = random.randint(0, 5)
            num_players_active = random.randint(2, 6)
            
            # Generate state features
            state_features = self.get_state_features(cards[:2], cards[2:], pot_size, current_bet, bankroll, round_num, player_position, num_players_active)
            
            # Determine target action based on hand type
            if hand_type <= 4:  # Top 5 hands (0-4 in the dataset)
                action_idx = 2  # Raise
            elif hand_type <= 9:  # Bottom 5 hands (5-9 in the dataset)
                action_idx = 1  # Call
            else:
                action_idx = 0  # Fold
            
            # Create one-hot encoded action
            action_target = np.zeros(3)
            action_target[action_idx] = 1
            
            X.append(state_features[0])
            y.append(action_target)
        
        X = np.array(X)
        y = np.array(y)
        
        # Train the model
        self.model.fit(X, y, epochs=epochs, batch_size=32, validation_split=0.2)
        
        print(f"Model trained on {len(X)} samples from dataset")
    
    def save_model(self, path):
        """Save the model to a file"""
        self.model.save(path)
        
        # Also save player profiles
        with open(path + "_profiles.pkl", "wb") as f:
            pickle.dump(self.player_profiles, f)
    
    def load_model(self, path):
        """Load a model from a file"""
        self.model = keras.models.load_model(path)
        
        # Load player profiles if available
        profile_path = path + "_profiles.pkl"
        if os.path.exists(profile_path):
            with open(profile_path, "rb") as f:
                self.player_profiles = pickle.load(f)

# Function to integrate with the poker_ai_2025 engine
def integrate_with_poker_engine(agent_path=None):
    # Initialize the agent
    agent = PokerAgent(model_path=agent_path)
    
    # Train on dataset if needed
    if not agent_path:
        agent.train_on_dataset("poker-hand-training.csv")
        agent.save_model("trained_poker_agent.h5")
    
    # Return the agent for use with the poker engine
    return agent

# Example usage:
if __name__ == "__main__":
    # Create and train a new agent
    agent = integrate_with_poker_engine()
    
    # Example of how to make a decision
    cards = [('Hearts', 'Ace'), ('Hearts', 'King')]
    community_cards = [('Hearts', 'Queen'), ('Hearts', '10'), ('Spades', '7')]
    pot_size = 300
    current_bet = 50
    bankroll = 1000
    round_num = 2  # Flop
    player_position = 3
    num_players_active = 4
    opponents = ["Player1", "Player2", "Player3"]
    
    action, bet = agent.make_decision(cards, community_cards, pot_size, current_bet, bankroll, round_num, player_position, num_players_active, opponents)
    print(f"Decision: {action}, Bet amount: {bet}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


FileNotFoundError: [Errno 2] No such file or directory: 'poker_game_dataset.csv'