In [18]:
!pip install treys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from collections import deque
import random
from treys import Card, Evaluator

# Set device first
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Enhanced poker hand evaluation with better equity calculation
def evaluate_hand(hole_cards, community_cards, deck=None):
    """Evaluate hand strength and estimate equity with Monte Carlo simulation."""
    evaluator = Evaluator()

    if len(community_cards) < 3:  # Preflop equity calculation
        # Simple preflop hand strength based on hole cards
        card1_str = Card.int_to_str(hole_cards[0])
        card2_str = Card.int_to_str(hole_cards[1])

        # Basic preflop hand strength scoring
        rank1 = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A'].index(card1_str[0])
        rank2 = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A'].index(card2_str[0])

        # Pocket pair bonus
        if rank1 == rank2:
            equity = 0.5 + (rank1 / 26)  # Higher pairs get higher equity
        # Suited bonus
        elif card1_str[1] == card2_str[1]:
            equity = 0.3 + (max(rank1, rank2) / 26) + 0.1
        # High card strength
        else:
            equity = 0.2 + (max(rank1, rank2) / 26)

        equity = min(equity, 0.85)  # Cap preflop equity
        return 0, 7462, equity

    # Post-flop evaluation
    rank = evaluator.evaluate(hole_cards, community_cards)
    rank_class = evaluator.get_rank_class(rank)

    # Better equity estimation based on hand strength
    equity_map = {
        1: 0.95,  # Straight flush
        2: 0.90,  # Four of a kind
        3: 0.85,  # Full house
        4: 0.80,  # Flush
        5: 0.75,  # Straight
        6: 0.65,  # Three of a kind
        7: 0.55,  # Two pair
        8: 0.40,  # One pair
        9: 0.25   # High card
    }

    base_equity = equity_map.get(rank_class, 0.25)
    # Adjust based on relative rank within class
    relative_strength = (7462 - rank) / 7462
    equity = base_equity + (relative_strength * 0.15)

    return rank_class, rank, min(equity, 0.98)

# Enhanced neural network architecture
class PokerQNetwork(nn.Module):
    def __init__(self):
        super(PokerQNetwork, self).__init__()
        # Improved card encoding
        self.card_embedding = nn.Embedding(53, 16)  # 52 cards + padding

        # Separate processing for hole cards vs community cards
        self.hole_conv = nn.Conv1d(16, 32, kernel_size=2)  # For 2 hole cards
        self.community_conv = nn.Conv1d(16, 32, kernel_size=3, padding=1)  # For 5 community cards

        # Attention mechanism for card importance
        self.attention = nn.MultiheadAttention(embed_dim=32, num_heads=4, batch_first=True)

        # Enhanced state processing
        self.state_fc = nn.Linear(10, 64)

        # Combined processing
        total_features = 32 + 32 + 64  # hole + community + state
        self.fc1 = nn.Linear(total_features, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 3)  # Fold, call, raise

        self.dropout = nn.Dropout(0.2)
        self.layer_norm = nn.LayerNorm(total_features)

    def forward(self, card_input, state_input):
        batch_size = card_input.size(0)

        # Convert card representation to embeddings
        # Assuming card_input is [batch, 2, 7] with suit/rank encoding
        # We'll create card IDs from this
        card_ids = torch.zeros(batch_size, 7, dtype=torch.long, device=card_input.device)

        for b in range(batch_size):
            for c in range(7):
                if card_input[b, 0, c] > 0 or card_input[b, 1, c] > 0:  # Non-padding
                    suit = int(card_input[b, 0, c] * 3)
                    rank = int(card_input[b, 1, c] * 12)
                    card_ids[b, c] = rank * 4 + suit + 1  # +1 to avoid 0 (padding)

        # Embed cards
        card_embeds = self.card_embedding(card_ids)  # [batch, 7, 16]

        # Separate hole and community cards
        hole_embeds = card_embeds[:, :2, :].transpose(1, 2)  # [batch, 16, 2]
        community_embeds = card_embeds[:, 2:, :].transpose(1, 2)  # [batch, 16, 5]

        # Process hole cards
        hole_features = self.hole_conv(hole_embeds)  # [batch, 32, 1]
        hole_features = hole_features.squeeze(-1)  # [batch, 32]

        # Process community cards
        community_features = self.community_conv(community_embeds)  # [batch, 32, 5]
        community_features = F.adaptive_avg_pool1d(community_features, 1).squeeze(-1)  # [batch, 32]

        # Apply attention to card features
        combined_cards = torch.stack([hole_features, community_features], dim=1)  # [batch, 2, 32]
        attended_cards, _ = self.attention(combined_cards, combined_cards, combined_cards)
        attended_cards = attended_cards.mean(dim=1)  # [batch, 32]

        # Process state
        state_features = F.relu(self.state_fc(state_input))

        # Combine all features
        combined = torch.cat([hole_features, attended_cards, state_features], dim=1)
        combined = self.layer_norm(combined)

        # Forward through fully connected layers
        x = F.relu(self.fc1(combined))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)

        return x

# Improved game simulation with better opponent modeling
def simulate_game(model, epsilon=0.1):
    suits = ['h', 's', 'd', 'c']
    ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A']
    deck = [r + s for r in ranks for s in suits]
    random.shuffle(deck)

    # Select stage with better distribution
    stage_weights = [0.4, 0.3, 0.2, 0.1]  # More preflop scenarios
    stage_idx = np.random.choice(4, p=stage_weights)
    stages = ['preflop', 'flop', 'turn', 'river']
    stage = stages[stage_idx]

    # Deal cards
    hole_cards = [Card.new(deck.pop()), Card.new(deck.pop())]
    community_cards = []
    if stage == 'flop':
        community_cards = [Card.new(deck.pop()) for _ in range(3)]
    elif stage == 'turn':
        community_cards = [Card.new(deck.pop()) for _ in range(4)]
    elif stage == 'river':
        community_cards = [Card.new(deck.pop()) for _ in range(5)]

    # More realistic opponent modeling
    hand_rank, rank, equity = evaluate_hand(hole_cards, community_cards)

    # Opponent behavior based on hand strength
    if equity > 0.7:  # Strong hand
        opponent_folded = random.random() < 0.05  # Almost never fold
        opponent_bet = random.uniform(20, 100)
        opponent_aggression = random.uniform(0.7, 1.0)
    elif equity > 0.4:  # Medium hand
        opponent_folded = random.random() < 0.3
        opponent_bet = random.uniform(5, 40) if not opponent_folded else 0
        opponent_aggression = random.uniform(0.3, 0.7)
    else:  # Weak hand
        opponent_folded = random.random() < 0.6
        opponent_bet = random.uniform(0, 15) if not opponent_folded else 0
        opponent_aggression = random.uniform(0.0, 0.4)

    # Game state
    pot = 10 + opponent_bet
    player_stack = 1000
    opponent_stack = 1000 - opponent_bet
    player_bet = 0

    # Position factor (simplified)
    position = random.choice(['early', 'middle', 'late'])
    position_bonus = {'early': 0.0, 'middle': 0.1, 'late': 0.2}[position]

    # Encode cards properly
    all_cards = hole_cards + community_cards
    while len(all_cards) < 7:
        all_cards.append(0)

    card_input = np.zeros((1, 2, 7))

    for i, card in enumerate(all_cards[:7]):
        if card != 0:
            card_str = Card.int_to_str(card)
            card_input[0, 0, i] = suits.index(card_str[-1]) / 3.0
            card_input[0, 1, i] = ranks.index(card_str[0]) / 12.0

    card_input = torch.FloatTensor(card_input).to(device)

    # Enhanced state features
    stage_onehot = np.zeros(4)
    stage_onehot[stage_idx] = 1

    state_features = [
        pot / 1000,
        player_stack / 1000,
        opponent_stack / 1000,
        player_bet / 1000,
        opponent_bet / 1000,
        float(opponent_folded),
        opponent_aggression,
        stage_onehot[0],  # Preflop
        stage_onehot[1],  # Flop
        position_bonus    # Position advantage
    ]

    state_input = torch.FloatTensor([state_features]).to(device)

    # Choose action
    if random.random() < epsilon:
        action = random.randint(0, 2)
    else:
        with torch.no_grad():
            q_values = model(card_input, state_input)
            action = torch.argmax(q_values, dim=1).item()

    # Much improved reward calculation
    pot_odds = opponent_bet / (pot + opponent_bet) if opponent_bet > 0 else 0
    base_reward = 0

    if action == 0:  # Fold
        if equity < 0.25:  # Good fold with very weak hand
            base_reward = 0.5
        elif equity < 0.4:  # Acceptable fold with weak hand
            base_reward = 0.1
        elif equity < 0.6:  # Marginal fold
            base_reward = -0.3
        else:  # Bad fold with strong hand
            base_reward = -2.0

    elif opponent_folded:
        base_reward = pot / 1000 + 0.5  # Win the pot plus bonus

    else:
        if action == 1:  # Call
            if equity > pot_odds + 0.1:  # Profitable call with margin
                base_reward = 1.0
            elif equity > pot_odds:  # Marginal call
                base_reward = 0.3
            else:  # Unprofitable call
                base_reward = -1.0

        else:  # Raise (action == 2)
            if equity > 0.7:  # Strong hand, good aggression
                base_reward = 1.5
            elif equity > 0.5:  # Medium hand, reasonable aggression
                base_reward = 0.8
            elif equity > 0.3 and random.random() < 0.3:  # Bluff
                base_reward = 0.4
            else:  # Bad aggression
                base_reward = -1.2

    # Stage and position adjustments
    stage_multiplier = [0.7, 1.0, 1.3, 1.5][stage_idx]
    reward = base_reward * stage_multiplier + position_bonus

    return card_input, state_input, action, reward

# Enhanced DQN with prioritized experience replay
class PrioritizedReplayBuffer:
    def __init__(self, capacity, alpha=0.6):
        self.capacity = capacity
        self.alpha = alpha
        self.buffer = []
        self.pos = 0
        self.priorities = np.zeros((capacity,), dtype=np.float32)

    def push(self, experience):
        max_prio = self.priorities.max() if self.buffer else 1.0

        if len(self.buffer) < self.capacity:
            self.buffer.append(experience)
        else:
            self.buffer[self.pos] = experience

        self.priorities[self.pos] = max_prio
        self.pos = (self.pos + 1) % self.capacity

    def sample(self, batch_size, beta=0.4):
        if len(self.buffer) == self.capacity:
            prios = self.priorities
        else:
            prios = self.priorities[:self.pos]

        probs = prios ** self.alpha
        probs /= probs.sum()

        indices = np.random.choice(len(self.buffer), batch_size, p=probs)
        samples = [self.buffer[idx] for idx in indices]

        total = len(self.buffer)
        weights = (total * probs[indices]) ** (-beta)
        weights /= weights.max()

        return samples, indices, np.array(weights, dtype=np.float32)

    def update_priorities(self, indices, priorities):
        for idx, prio in zip(indices, priorities):
            self.priorities[idx] = prio

    def __len__(self):
        return len(self.buffer)

# Training with improved hyperparameters
model = PokerQNetwork().to(device)
target_model = PokerQNetwork().to(device)
target_model.load_state_dict(model.state_dict())

# Better optimizer settings
optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=500, T_mult=2)

# Use prioritized replay buffer
replay_buffer = PrioritizedReplayBuffer(20000)

# Improved hyperparameters
gamma = 0.995
batch_size = 128
target_update_freq = 50
epsilon_start = 1.0
epsilon_end = 0.05
epsilon_decay = 0.9995

epsilon = epsilon_start
beta = 0.4
beta_increment = 0.0001

print("Starting enhanced training...")
best_reward = -float('inf')
patience = 0
max_patience = 300

for episode in range(3000):
    # Decay epsilon and increase beta
    epsilon = max(epsilon_end, epsilon * epsilon_decay)
    beta = min(1.0, beta + beta_increment)

    # Generate experience
    card_input, state_input, action, reward = simulate_game(model, epsilon)

    # Store experience
    experience = (card_input.cpu(), state_input.cpu(), action, reward)
    replay_buffer.push(experience)

    # Train when we have enough samples
    if len(replay_buffer) >= batch_size:
        # Sample from prioritized buffer
        if len(replay_buffer) >= batch_size * 2:  # Use prioritized sampling
            batch, indices, weights = replay_buffer.sample(batch_size, beta)
            weights = torch.FloatTensor(weights).to(device)
        else:
            # Fall back to uniform sampling for initial training
            batch = random.sample(replay_buffer.buffer, batch_size)
            indices = None
            weights = torch.ones(batch_size).to(device)

        # Prepare batch data
        card_inputs = torch.cat([b[0] for b in batch]).to(device)
        state_inputs = torch.cat([b[1] for b in batch]).to(device)
        actions = torch.LongTensor([b[2] for b in batch]).to(device)
        rewards = torch.FloatTensor([b[3] for b in batch]).to(device)

        # Compute Q-values
        optimizer.zero_grad()
        current_q_values = model(card_inputs, state_inputs)
        current_q_values = current_q_values.gather(1, actions.unsqueeze(1)).squeeze(1)

        # Target Q-values (using target network)
        with torch.no_grad():
            target_q_values = target_model(card_inputs, state_inputs)
            max_target_q = target_q_values.max(1)[0]
            target_values = rewards + gamma * max_target_q

        # Compute weighted loss
        td_errors = target_values - current_q_values
        loss = (weights * td_errors.pow(2)).mean()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        scheduler.step()

        # Update priorities if using prioritized replay
        if indices is not None:
            priorities = td_errors.abs().detach().cpu().numpy() + 1e-6
            replay_buffer.update_priorities(indices, priorities)

        # Update target network
        if episode % target_update_freq == 0:
            target_model.load_state_dict(model.state_dict())

        # Enhanced logging and early stopping
        if episode % 100 == 99:
            recent_rewards = [exp[3] for exp in list(replay_buffer.buffer)[-200:]]
            avg_reward = np.mean(recent_rewards)

            if avg_reward > best_reward:
                best_reward = avg_reward
                patience = 0
                # Save best model
                torch.save(model.state_dict(), 'best_poker_model.pth')
            else:
                patience += 1

            print(f"Episode {episode + 1}, Loss: {loss.item():.4f}, "
                  f"Avg Reward: {avg_reward:.3f}, Best: {best_reward:.3f}, "
                  f"Epsilon: {epsilon:.3f}, Patience: {patience}")

            if patience >= max_patience:
                print("Early stopping triggered!")
                break

print("Training completed!")

# Load best model for testing
model.load_state_dict(torch.load('best_poker_model.pth'))
model.eval()

# Comprehensive testing
print("Testing enhanced model performance...")
test_results = {'correct': 0, 'total': 0, 'rewards': [], 'actions': []}

with torch.no_grad():
    for _ in range(1000):  # More comprehensive testing
        card_input, state_input, action, reward = simulate_game(model, epsilon=0.0)
        test_results['rewards'].append(reward)
        test_results['actions'].append(action)

        # Improved accuracy calculation
        if reward > 0.3:  # Good decision
            test_results['correct'] += 1
        elif reward > -0.2:  # Neutral/acceptable decision
            test_results['correct'] += 1
        # else: bad decision, don't count as correct

        test_results['total'] += 1

accuracy = 100 * test_results['correct'] / test_results['total']
avg_reward = np.mean(test_results['rewards'])
reward_std = np.std(test_results['rewards'])

print(f"Enhanced Test Accuracy: {accuracy:.2f}%")
print(f"Average Test Reward: {avg_reward:.3f}")
print(f"Reward Std: {reward_std:.3f}")

# Detailed action analysis
action_counts = [0, 0, 0]
for action in test_results['actions']:
    action_counts[action] += 1

total_actions = sum(action_counts)
print(f"\nAction Distribution:")
print(f"Fold: {100*action_counts[0]/total_actions:.1f}%")
print(f"Call: {100*action_counts[1]/total_actions:.1f}%")
print(f"Raise: {100*action_counts[2]/total_actions:.1f}%")

# Performance by reward range
high_rewards = [r for r in test_results['rewards'] if r > 0.5]
medium_rewards = [r for r in test_results['rewards'] if 0 <= r <= 0.5]
low_rewards = [r for r in test_results['rewards'] if r < 0]

print(f"\nReward Distribution:")
print(f"High rewards (>0.5): {len(high_rewards)} ({100*len(high_rewards)/len(test_results['rewards']):.1f}%)")
print(f"Medium rewards (0-0.5): {len(medium_rewards)} ({100*len(medium_rewards)/len(test_results['rewards']):.1f}%)")
print(f"Negative rewards (<0): {len(low_rewards)} ({100*len(low_rewards)/len(test_results['rewards']):.1f}%)")

Using device: cpu
Starting enhanced training...
Episode 200, Loss: 0.1622, Avg Reward: 0.054, Best: 0.054, Epsilon: 0.905, Patience: 0
Episode 300, Loss: 0.0481, Avg Reward: 0.100, Best: 0.100, Epsilon: 0.861, Patience: 0
Episode 400, Loss: 0.1559, Avg Reward: 0.097, Best: 0.100, Epsilon: 0.819, Patience: 1
Episode 500, Loss: 0.0733, Avg Reward: 0.214, Best: 0.214, Epsilon: 0.779, Patience: 0
Episode 600, Loss: 0.4834, Avg Reward: 0.208, Best: 0.214, Epsilon: 0.741, Patience: 1
Episode 700, Loss: 0.3390, Avg Reward: 0.145, Best: 0.214, Epsilon: 0.705, Patience: 2
Episode 800, Loss: 0.2915, Avg Reward: 0.255, Best: 0.255, Epsilon: 0.670, Patience: 0
Episode 900, Loss: 0.4991, Avg Reward: 0.215, Best: 0.255, Epsilon: 0.638, Patience: 1
Episode 1000, Loss: 0.5441, Avg Reward: 0.142, Best: 0.255, Epsilon: 0.606, Patience: 2
Episode 1100, Loss: 0.6239, Avg Reward: 0.252, Best: 0.255, Epsilon: 0.577, Patience: 3
Episode 1200, Loss: 0.8116, Avg Reward: 0.353, Best: 0.353, Epsilon: 0.549, Pati

In [25]:
!mkdir -p streamlit_app

In [28]:
%%writefile streamlit_app/app.py
import streamlit as st
import torch
import torch.nn as nn
import numpy as np
from treys import Card, Evaluator
import pandas as pd
import plotly.express as px

# Define PokerQNetwork class (original with 10 state features)
class PokerQNetwork(nn.Module):
    def __init__(self):
        super(PokerQNetwork, self).__init__()
        self.card_embedding = nn.Embedding(53, 16)
        self.hole_conv = nn.Conv1d(16, 32, kernel_size=2)
        self.community_conv = nn.Conv1d(16, 32, kernel_size=3, padding=1)
        self.attention = nn.MultiheadAttention(embed_dim=32, num_heads=4, batch_first=True)
        self.state_fc = nn.Linear(10, 64)  # Original state input size
        total_features = 32 + 32 + 64
        self.fc1 = nn.Linear(total_features, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 3)
        self.dropout = nn.Dropout(0.2)
        self.layer_norm = nn.LayerNorm(total_features)

    def forward(self, card_input, state_input):
        batch_size = card_input.size(0)
        card_ids = torch.zeros(batch_size, 7, dtype=torch.long, device=card_input.device)

        suits = ['h', 's', 'd', 'c']
        ranks = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A']

        for b in range(batch_size):
            for c in range(7):
                if card_input[b, 0, c] > 0 or card_input[b, 1, c] > 0:
                    suit = int(card_input[b, 0, c] * 3)
                    rank = int(card_input[b, 1, c] * 12)
                    card_ids[b, c] = rank * 4 + suit + 1

        card_embeds = self.card_embedding(card_ids)
        hole_embeds = card_embeds[:, :2, :].transpose(1, 2)
        community_embeds = card_embeds[:, 2:, :].transpose(1, 2)

        hole_features = self.hole_conv(hole_embeds).squeeze(-1)
        community_features = self.community_conv(community_embeds)
        community_features = torch.nn.functional.adaptive_avg_pool1d(community_features, 1).squeeze(-1)

        combined_cards = torch.stack([hole_features, community_features], dim=1)
        attended_cards, _ = self.attention(combined_cards, combined_cards, combined_cards)
        attended_cards = attended_cards.mean(dim=1)

        state_features = torch.nn.functional.relu(self.state_fc(state_input))

        combined = torch.cat([hole_features, attended_cards, state_features], dim=1)
        combined = self.layer_norm(combined)

        x = torch.nn.functional.relu(self.fc1(combined))
        x = self.dropout(x)
        x = torch.nn.functional.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.nn.functional.relu(self.fc3(x))
        x = self.fc4(x)

        return x

# Enhanced evaluate_hand function with multi-opponent adjustment
def evaluate_hand(hole_cards, community_cards, num_opponents=1):
    evaluator = Evaluator()

    if len(community_cards) < 3:  # Preflop
        card1_str = Card.int_to_str(hole_cards[0])
        card2_str = Card.int_to_str(hole_cards[1])
        rank1 = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A'].index(card1_str[0])
        rank2 = ['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A'].index(card2_str[0])

        if rank1 == rank2:
            equity = 0.5 + (rank1 / 26)
        elif card1_str[1] == card2_str[1]:
            equity = 0.3 + (max(rank1, rank2) / 26) + 0.1
        else:
            equity = 0.2 + (max(rank1, rank2) / 26)

        # Adjust equity for multiple opponents
        equity = equity * (0.85 ** num_opponents)
        equity = min(equity, 0.85)

        # Hand strength percentile
        all_hands = 1326
        rank = min(rank1, rank2) if rank1 != rank2 else rank1
        suited = card1_str[1] == card2_str[1]
        if rank1 == rank2:
            percentile = 100 * (1 - (6 * (12 - rank) / all_hands))
        elif suited:
            percentile = 100 * (1 - (4 * (169 - (rank1 + rank2 + 10)) / all_hands))
        else:
            percentile = 100 * (1 - (12 * (169 - (rank1 + rank2)) / all_hands))

        return 0, 7462, equity, max(0, min(100, percentile))

    # Post-flop
    rank = evaluator.evaluate(hole_cards, community_cards)
    rank_class = evaluator.get_rank_class(rank)

    equity_map = {
        1: 0.95, 2: 0.90, 3: 0.85, 4: 0.80, 5: 0.75,
        6: 0.65, 7: 0.55, 8: 0.40, 9: 0.25
    }

    base_equity = equity_map.get(rank_class, 0.25)
    relative_strength = (7462 - rank) / 7462
    equity = base_equity + (relative_strength * 0.15)

    # Adjust for multiple opponents
    equity = equity * (0.9 ** num_opponents)
    equity = min(equity, 0.98)

    # Hand strength percentile
    percentile = 100 * (1 - rank / 7462)

    return rank_class, rank, equity, max(0, min(100, percentile))

# Set up the Streamlit page
st.set_page_config(page_title="Advanced Poker AI Predictor", page_icon="🃏", layout="wide")
st.title('Advanced Poker AI Predictor')
st.write('This app uses a deep Q-learning model to predict the best action in Texas Hold\'em, supporting multiple opponents, visual card selection, and detailed analysis.')

# Initialize session state for prediction history
if 'prediction_history' not in st.session_state:
    st.session_state.prediction_history = []

# Load the model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = PokerQNetwork().to(device)
try:
    model.load_state_dict(torch.load('best_poker_model.pth', map_location=device))
    model.eval()
    st.success("Model loaded successfully!")
except FileNotFoundError:
    st.error("Model file 'best_poker_model.pth' not found. Please ensure it's in the same directory.")
    st.stop()

# Sidebar for user inputs
st.sidebar.header('Game State Inputs')

# Number of opponents
num_opponents = st.sidebar.selectbox('Number of Opponents', [1, 2, 3, 4, 5], index=0)

# Visual card picker
st.sidebar.subheader('Card Selection')
suits = ['♥', '♠', '♦', '♣']
ranks = ['2', '3', '4', '5', '6', '7', '8', '9', '10', 'J', 'Q', 'K', 'A']
deck = [f"{r}{s}" for r in ranks for s in suits]

def card_image(card_str):
    if card_str == "None":
        return None
    rank = card_str[:-1].lower().replace('10', 'T').replace('j', 'J').replace('q', 'Q').replace('k', 'K').replace('a', 'A')
    suit = {'♥': 'H', '♠': 'S', '♦': 'D', '♣': 'C'}[card_str[-1]]
    image_url = f'https://deckofcardsapi.com/static/img/{rank}{suit}.png'
    return image_url

# Hole cards
st.sidebar.write('**Hole Cards**')
hole_cols = st.sidebar.columns(2)
hole_cards = []
selected_cards = []
for i in range(2):
    with hole_cols[i]:
        card_key = f"hole_card_{i}"
        selected = st.selectbox(f"Card {i+1}", ['None'] + deck, key=card_key)
        if selected != 'None':
            hole_cards.append(Card.new(selected[:-1] + {'♥': 'h', '♠': 's', '♦': 'd', '♣': 'c'}[selected[-1]]))
            selected_cards.append(selected)
            st.image(card_image(selected), width=80)

# Community cards
st.sidebar.subheader('Community Cards')
stage = st.sidebar.selectbox('Game Stage', ['Preflop', 'Flop', 'Turn', 'River'])
community_cards = []
if stage != 'Preflop':
    num_comm_cards = 3 if stage == 'Flop' else 4 if stage == 'Turn' else 5
    comm_cols = st.sidebar.columns(num_comm_cards)
    for i in range(num_comm_cards):
        with comm_cols[i]:
            card_key = f"comm_card_{i}"
            selected = st.selectbox(f"Card {i+1}", ['None'] + deck, key=card_key)
            if selected != 'None':
                community_cards.append(Card.new(selected[:-1] + {'♥': 'h', '♠': 's', '♦': 'd', '♣': 'c'}[selected[-1]]))
                selected_cards.append(selected)
                st.image(card_image(selected), width=80)

# Validate card selections
if len(selected_cards) != len(set(selected_cards)):
    st.error("Duplicate cards selected. Please choose unique cards.")
    st.stop()

if len(hole_cards) != 2:
    st.error("Please select both hole cards.")
    st.stop()

# Game state inputs for player and opponents
st.sidebar.subheader('Game State')
pot = st.sidebar.slider('Pot Size ($)', 0, 5000, 50)
player_stack = st.sidebar.slider('Your Stack ($)', 0, 10000, 1000)
player_bet = st.sidebar.slider('Your Current Bet ($)', 0, 1000, 0)
position = st.sidebar.selectbox('Position', ['Early', 'Middle', 'Late'])

opponent_data = []
for opp in range(num_opponents):
    with st.sidebar.expander(f"Opponent {opp+1}"):
        opp_stack = st.slider(f'Opponent {opp+1} Stack ($)', 0, 10000, 1000, key=f'opp_stack_{opp}')
        opp_bet = st.slider(f'Opponent {opp+1} Bet ($)', 0, 1000, 10, key=f'opp_bet_{opp}')
        opp_folded = st.checkbox(f'Opponent {opp+1} Folded', value=False, key=f'opp_folded_{opp}')
        opp_aggression = st.slider(f'Opponent {opp+1} Aggression (0-1)', 0.0, 1.0, 0.5, key=f'opp_agg_{opp}')
        opponent_data.append({
            'stack': opp_stack,
            'bet': opp_bet,
            'folded': opp_folded,
            'aggression': opp_aggression
        })

# Prepare input tensors
card_input = np.zeros((1, 2, 7))
suits_map = {'h': 0, 's': 1, 'd': 2, 'c': 3}
ranks_map = {r: i for i, r in enumerate(['2', '3', '4', '5', '6', '7', '8', '9', 'T', 'J', 'Q', 'K', 'A'])}
all_cards = hole_cards + community_cards
while len(all_cards) < 7:
    all_cards.append(0)

for i, card in enumerate(all_cards[:7]):
    if card != 0:
        card_str = Card.int_to_str(card)
        card_input[0, 0, i] = suits_map[card_str[-1]] / 3.0
        card_input[0, 1, i] = ranks_map[card_str[0]] / 12.0

card_input = torch.FloatTensor(card_input).to(device)

# Encode state (adjusted to 10 features)
stage_onehot = np.zeros(4)
stage_idx = ['Preflop', 'Flop', 'Turn', 'River'].index(stage)
stage_onehot[stage_idx] = 1

position_bonus = {'Early': 0.0, 'Middle': 0.1, 'Late': 0.2}[position]

# Aggregate opponent data
avg_opp_stack = sum(opp['stack'] for opp in opponent_data) / max(1, num_opponents)
avg_opp_bet = sum(opp['bet'] for opp in opponent_data if not opp['folded']) / max(1, sum(1 for opp in opponent_data if not opp['folded']))
avg_opp_aggression = sum(opp['aggression'] for opp in opponent_data) / max(1, num_opponents)
any_opp_folded = any(opp['folded'] for opp in opponent_data)

state_features = [
    pot / 1000,
    player_stack / 1000,
    avg_opp_stack / 1000,
    player_bet / 1000,
    avg_opp_bet / 1000,
    float(any_opp_folded),
    avg_opp_aggression,
    stage_onehot[0],
    stage_onehot[1],
    position_bonus
]

state_input = torch.FloatTensor([state_features]).to(device)

# Display user inputs
st.subheader('Your Inputs')
col1, col2 = st.columns([2, 1])
with col1:
    st.write('**Hole Cards:**', ', '.join([Card.int_to_pretty_str(c) for c in hole_cards]))
    st.write('**Community Cards:**', ', '.join([Card.int_to_pretty_str(c) for c in community_cards]) if community_cards else 'None')
    st.write('**Game State:**')
    input_data = {
        'Pot Size ($)': pot,
        'Your Stack ($)': player_stack,
        'Your Bet ($)': player_bet,
        'Avg Opponent Stack ($)': avg_opp_stack,
        'Avg Opponent Bet ($)': avg_opp_bet,
        'Any Opponent Folded': any_opp_folded,
        'Avg Opponent Aggression': avg_opp_aggression,
        'Game Stage': stage,
        'Position': position,
        'Number of Opponents': num_opponents
    }
    st.write(pd.DataFrame([input_data]))

# Display cards
with col2:
    st.write('**Your Cards**')
    cols = st.columns(2)
    for i, card in enumerate(hole_cards):
        cols[i].image(card_image(Card.int_to_pretty_str(card)), caption=Card.int_to_pretty_str(card), width=80)
    if community_cards:
        st.write('**Community Cards**')
        cols = st.columns(len(community_cards))
        for i, card in enumerate(community_cards):
            cols[i].image(card_image(Card.int_to_pretty_str(card)), caption=Card.int_to_pretty_str(card), width=80)

# Make prediction
with torch.no_grad():
    q_values = model(card_input, state_input)
    action_probs = torch.softmax(q_values, dim=1).cpu().numpy()[0]
    action = torch.argmax(q_values, dim=1).item()

# Evaluate hand strength
rank_class, rank, equity, percentile = evaluate_hand(hole_cards, community_cards, num_opponents)

# Store prediction in history
prediction_entry = {
    'Timestamp': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
    'Hole Cards': ', '.join([Card.int_to_pretty_str(c) for c in hole_cards]),
    'Community Cards': ', '.join([Card.int_to_pretty_str(c) for c in community_cards]) if community_cards else 'None',
    'Stage': stage,
    'Recommended Action': ['Fold', 'Call', 'Raise'][action],
    'Equity': f'{equity:.2%}',
    'Hand Strength Percentile': f'{percentile:.1f}%'
}
st.session_state.prediction_history.append(prediction_entry)
if len(st.session_state.prediction_history) > 10:
    st.session_state.prediction_history = st.session_state.prediction_history[-10:]

# Display results
st.subheader('Prediction')
action_names = ['Fold', 'Call', 'Raise']
st.write(f'The model recommends to **{action_names[action]}**.')
st.write(f'Estimated Hand Equity: **{equity:.2%}** (against {num_opponents} opponent(s))')
st.write(f'Hand Strength Percentile: **{percentile:.1f}%** (top {100-percentile:.1f}% of possible hands)')

# Action probabilities
st.subheader('Action Probabilities')
prob_df = pd.DataFrame({
    'Action': action_names,
    'Probability': [f'{p:.2%}' for p in action_probs]
})
st.write(prob_df)

fig = px.bar(
    x=action_names,
    y=action_probs,
    title='Action Probability Distribution',
    labels={'x': 'Action', 'y': 'Probability'},
    text=[f'{p:.2%}' for p in action_probs]
)
fig.update_traces(textposition='auto')
fig.update_yaxes(range=[0, 1], tickformat='.0%')
st.plotly_chart(fig)

# In-depth analysis
st.subheader('In-Depth Analysis')
pot_odds = avg_opp_bet / (pot + avg_opp_bet) if avg_opp_bet > 0 else 0
st.write(f'**Pot Odds**: {pot_odds:.2%} (you need {pot_odds:.2%} equity to break even on a call)')

# Stage-specific insights
stage_insights = {
    'Preflop': f"In the preflop stage, your hand's equity ({equity:.2%}) is based on starting hand strength against {num_opponents} opponent(s). With a percentile of {percentile:.1f}%, your hand is {'strong' if percentile > 80 else 'medium' if percentile > 50 else 'weak'}. {'Consider raising with strong hands to build the pot.' if percentile > 80 else 'Play cautiously unless position or odds favor you.'}",
    'Flop': f"On the flop, your equity ({equity:.2%}) reflects your hand's strength with three community cards. A percentile of {percentile:.1f}% indicates {'a strong made hand or draw' if percentile > 70 else 'a moderate hand' if percentile > 40 else 'a weak hand or weak draw'}. {'Aggression may be warranted with strong hands or draws.' if percentile > 70 else 'Evaluate draws carefully against pot odds.'}",
    'Turn': f"On the turn, with four community cards, your equity ({equity:.2%}) is more defined. Your hand's percentile ({percentile:.1f}%) suggests {'a strong hand or draw' if percentile > 65 else 'a marginal hand' if percentile > 35 else 'a weak hand'}. {'Protect strong hands with bets; consider folding weak hands unless odds are favorable.' if percentile > 65 else 'Be cautious with marginal hands.'}",
    'River': f"On the river, your hand is fully defined with equity ({equity:.2%}) and percentile ({percentile:.1f}%). This indicates {'a strong hand' if percentile > 60 else 'a medium hand' if percentile > 30 else 'a weak hand'}. {'Value bet strong hands; bluff selectively with weak hands.' if percentile > 60 else 'Check or fold unless pot odds justify a call.'}"
}
st.write(f'**Stage Insight ({stage})**: {stage_insights[stage]}')

# Action analysis
st.write('**Action Analysis**:')
for i, action_name in enumerate(action_names):
    expected_value = 0
    if action_name == 'Fold':
        expected_value = 0
        analysis = "Folding avoids further risk but forfeits the current pot. Recommended with weak hands or poor pot odds."
    elif action_name == 'Call':
        if equity > pot_odds + 0.1:
            expected_value = (equity * pot) - ((1 - equity) * avg_opp_bet)
            analysis = f"Calling is profitable with {equity:.2%} equity against {pot_odds:.2%} pot odds. Expected value: ${expected_value:.2f}."
        elif equity > pot_odds:
            expected_value = (equity * pot) - ((1 - equity) * avg_opp_bet)
            analysis = f"Calling is marginal with {equity:.2%} equity close to {pot_odds:.2%} pot odds. Expected value: ${expected_value:.2f}."
        else:
            expected_value = (equity * pot) - ((1 - equity) * avg_opp_bet)
            analysis = f"Calling may be unprofitable with {equity:.2%} equity below {pot_odds:.2%} pot odds. Expected value: ${expected_value:.2f}."
    else:  # Raise
        raise_amount = avg_opp_bet * 2 if avg_opp_bet > 0 else 20
        if equity > 0.6:
            expected_value = (equity * (pot + raise_amount)) - ((1 - equity) * (avg_opp_bet + raise_amount))
            analysis = f"Raising with a strong hand ({equity:.2%} equity) can build the pot or force folds. Expected value: ${expected_value:.2f}."
        elif equity > 0.4 and avg_opp_aggression < 0.5:
            expected_value = (equity * (pot + raise_amount)) - ((1 - equity) * (avg_opp_bet + raise_amount))
            analysis = f"Raising as a semi-bluff with {equity:.2%} equity may induce folds from less aggressive opponents. Expected value: ${expected_value:.2f}."
        else:
            expected_value = (equity * (pot + raise_amount)) - ((1 - equity) * (avg_opp_bet + raise_amount))
            analysis = f"Raising with {equity:.2%} equity is risky against aggressive opponents. Expected value: ${expected_value:.2f}."

    st.write(f"- **{action_name}**: {analysis}")

# Prediction history
st.subheader('Prediction History (Last 10)')
if st.session_state.prediction_history:
    history_df = pd.DataFrame(st.session_state.prediction_history)
    st.write(history_df)
else:
    st.write("No predictions yet.")



Overwriting streamlit_app/app.py


In [29]:
!pip install -q streamlit treys
!npm install localtunnel
!wget -q -O - ipv4.icanhazip.com
!streamlit run streamlit_app/app.py & npx localtunnel --port 8501

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K
up to date, audited 23 packages in 2s
[1G[0K⠴[1G[0K
[1G[0K⠴[1G[0K3 packages are looking for funding
[1G[0K⠴[1G[0K  run `npm fund` for details
[1G[0K⠴[1G[0K
2 [31m[1mhigh[22m[39m severity vulnerabilities

To address all issues (including breaking changes), run:
  npm audit fix --force

Run `npm audit` for details.
[1G[0K⠴[1G[0K34.168.235.31
[1G[0K⠙[1G[0K⠹[1G[0K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.168.235.31:8501[0m
[0m
your url is: https://four-clubs-attack.loca.lt
2025-06-13 22:41:07.798 Examining the path of torch.classes raised:
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/streamlit/web/boo