In [17]:
import torch
from torch import nn
from torch.nn import functional as F
import random
import math
import re
import time 

batch_size = 64
epochs = 4
learning_rate = 1e-3

#{"context": " \n[TABLE_CONFIGURATION]\nBTN=P1\nSB=P1 0.5BB\nBB=P2 1BB\n\n[STACKS]\nP1: 50.9BB\nP2: 19.0BB [As 6d]\nPOT=1.5BB\n\n[PREFLOP]\nP1: RAISE 1BB\nP2: ", "truth": "ALLIN 19BB\n"}

In [2]:
batch_size = 64
epochs = 4
learning_rate = 1e-3

## Tokenizer

In [None]:
# Définition du vocabulaire
sorted_vocab = ['[TABLE_CONFIGURATION]', '[PREFLOP]', '[STACKS]', '[RIVER]', '[FLOP]', 
                '[TURN]', 'CHECK', 'RAISE', 'ALLIN', '[PAD]', '[EOS]', 'FOLD', 'CALL', 
                'POT', 'BTN', 'BET', 'BB', 'SB', '2s', '3s', '4s', '5s', '6s', '7s', 
                '8s', '9s', 'Ts', 'Js', 'Qs', 'Ks', 'As', '2h', '3h', '4h', '5h', '6h', 
                '7h', '8h', '9h', 'Th', 'Jh', 'Qh', 'Kh', 'Ah', '2d', '3d', '4d', '5d', 
                '6d', '7d', '8d', '9d', 'Td', 'Jd', 'Qd', 'Kd', 'Ad', '2c', '3c', '4c', 
                '5c', '6c', '7c', '8c', '9c', 'Tc', 'Jc', 'Qc', 'Kc', 'Ac', '0', '1', 
                '2', '3', '4', '5', '6', '7', '8', '9', '.', ':', '=', '\n', ' ', '[', ']']

class PokerTokenizer:
    def __init__(self, vocab):
        self.vocab = vocab
        self.token_to_id = {v: k for k, v in enumerate(self.vocab)}
        self.id_to_token = {k: v for k, v in enumerate(self.vocab)}
        self.ntokens = len(self.vocab)
        self.pattern = re.compile(r"(" + "|".join(re.escape(token) for token in self.vocab) + r")")

    def pre_tokenization(self, text):
        """Tokenize le texte en utilisant la regex basée sur les tokens du vocab."""
        return self.pattern.findall(text)

    def encode(self, text):
        """Convertit une séquence de texte en liste d'IDs."""
        tokens = self.pre_tokenization(text)
        return [self.token_to_id[token] for token in tokens if token in self.token_to_id]

    def decode(self, token_list):
        """Convertit une liste d'IDs en texte."""
        return "".join([self.id_to_token[x] for x in token_list])


# Test du tokenizer
tokenizer = PokerTokenizer(sorted_vocab)
prompt = "\n[TABLE_CONFIGURATION]\nBTN=P1\nSB=P1 0.5BB\nBB=P2 1BB\n\n[STACKS]\nP1: 50.9BB\nP2: 19.0BB [As 6d]\nPOT=1.5BB\n\n[PREFLOP]\nP1: RAISE 1BB\nP2: "
encoded = tokenizer.encode(prompt)
decoded = tokenizer.decode(encoded)

print("Encoded:", encoded)
print("Decoded:", decoded)

Encoded: [83, 0, 83, 14, 82, 71, 83, 17, 82, 71, 84, 70, 80, 75, 16, 83, 16, 82, 72, 84, 71, 16, 83, 83, 2, 83, 71, 81, 84, 75, 70, 80, 79, 16, 83, 72, 81, 84, 71, 79, 80, 70, 16, 84, 85, 30, 84, 48, 86, 83, 13, 82, 71, 80, 75, 16, 83, 83, 1, 83, 71, 81, 84, 7, 84, 71, 16, 83, 72, 81, 84]
Decoded: 
[TABLE_CONFIGURATION]
BTN=1
SB=1 0.5BB
BB=2 1BB

[STACKS]
1: 50.9BB
2: 19.0BB [As 6d]
POT=1.5BB

[PREFLOP]
1: RAISE 1BB
2: 


## Dataset and dataloader

In [20]:
import torch
from torch.utils.data import Dataset, DataLoader
import json
import os
import random

class PokerDataset(Dataset):
    def __init__(self, data_dir, tokenizer, max_files=None):
        """
        Args:
            data_dir (str): Chemin vers le dossier contenant les fichiers JSON.
            tokenizer (PokerTokenizer): Tokenizer pour transformer le texte en tokens.
            max_files (int, optional): Nombre maximum de fichiers à charger.
        """
        self.tokenizer = tokenizer
        self.data_files = [os.path.join(data_dir, f) for f in os.listdir(data_dir) if f.endswith(".json")]

        if max_files:
            self.data_files = self.data_files[:max_files]  # Limite le nombre de fichiers

    def __len__(self):
        return len(self.data_files)

    def __getitem__(self, idx):
        """Charge un fichier JSON et retourne les séquences tokenizées."""
        with open(self.data_files[idx], "r", encoding="utf-8") as f:
            data = json.load(f)

        context = data["context"]
        truth = data["truth"]

        context_tokens = self.tokenizer.encode(context)
        truth_tokens = self.tokenizer.encode(truth) + [self.tokenizer.token_to_id["[EOS]"]]  # Ajout de [EOS]

        return torch.tensor(context_tokens, dtype=torch.long), torch.tensor(truth_tokens, dtype=torch.long)

def collate_fn(batch):
    """
    Padding des contextes à gauche et des truths à droite pour créer des batches de taille uniforme.
    """
    context_batch, truth_batch = zip(*batch)

    # Trouver la longueur maximale dans le batch
    max_context_len = max(len(seq) for seq in context_batch)
    max_truth_len = max(len(seq) for seq in truth_batch)

    pad_token_id = tokenizer.token_to_id["[PAD]"]

    # Padding à gauche pour les contextes
    padded_contexts = [torch.cat([torch.full((max_context_len - len(seq),), pad_token_id, dtype=torch.long), seq]) for seq in context_batch]

    # Padding à droite pour les truths
    padded_truths = [torch.cat([seq, torch.full((max_truth_len - len(seq),), pad_token_id, dtype=torch.long)]) for seq in truth_batch]

    return torch.stack(padded_contexts), torch.stack(padded_truths)

# Exemple d'utilisation :
data_dir = "data/train/poker_dataset"  # Remplace par le bon chemin
batch_size = 8

dataset = PokerDataset(data_dir, tokenizer)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

# Test avec un batch
for context_batch, truth_batch in dataloader:
    print("Context batch shape:", context_batch.shape)  # (batch_size, max_context_len)
    print("Truth batch shape:", truth_batch.shape)  # (batch_size, max_truth_len)
    break
next(iter(dataloader))

Context batch shape: torch.Size([8, 143])
Truth batch shape: torch.Size([8, 6])


(tensor([[ 9,  9,  9,  ..., 72, 81, 84],
         [ 9,  9,  9,  ..., 75, 81, 84],
         [ 9,  9,  9,  ..., 72, 81, 84],
         ...,
         [ 9,  9,  9,  ..., 74, 81, 84],
         [ 9,  9,  9,  ..., 75, 81, 84],
         [ 9,  9,  9,  ..., 75, 81, 84]]),
 tensor([[11, 83, 10,  9,  9,  9,  9],
         [ 7, 84, 72, 16, 83, 10,  9],
         [11, 83, 10,  9,  9,  9,  9],
         [11, 83, 10,  9,  9,  9,  9],
         [15, 84, 71, 71, 16, 83, 10],
         [11, 83, 10,  9,  9,  9,  9],
         [ 8, 84, 72, 78, 16, 83, 10],
         [11, 83, 10,  9,  9,  9,  9]]))

## Model

In [26]:
class PositionalEmbedding(nn.Module):
    r"""Inject some information about the relative or absolute position of the tokens in the sequence.
        The positional encodings have the same dimension as the embeddings, so that the two can be summed.
        Here, we use sine and cosine functions of different frequencies.
    .. math:
        \text{PosEmbedder}(pos, 2i) = sin(pos/10000^(2i/d_model))
        \text{PosEmbedder}(pos, 2i+1) = cos(pos/10000^(2i/d_model))
        \text{where pos is the word position and i is the embed idx)
    Args:
        d_model: the embed dim (required).
        dropout: the dropout value (default=0.1).
        max_len: the max. length of the incoming sequence (default=5000).
    """

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEmbedding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """Inputs of forward function
        Args:
            x: the sequence fed to the positional encoder model (required).
        Shape:
            x: [sequence length, batch size, embed dim]
            output: [sequence length, batch size, embed dim]
        """
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

In [30]:
class TransformerModel(nn.Transformer):
    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(TransformerModel, self).__init__(d_model=ninp,
                                               nhead=nhead,
                                               dim_feedforward=nhid,
                                               num_encoder_layers=nlayers)
        self.input_emb = nn.Embedding(ntoken, ninp)
        self.pos_encoder = PositionalEmbedding(ninp, dropout)
        self.decoder = nn.Linear(ninp, ntoken)

        self.ninp = ninp
        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.input_emb.weight, -initrange, initrange)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -initrange, initrange)

    def _generate_square_subsequent_mask(self, sz):
        return torch.log(torch.tril(torch.ones(sz,sz)))

    def forward(self, src):
        mask = self._generate_square_subsequent_mask(len(src)).to(device)
        self.src_mask = mask

        src = self.input_emb(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        output_enc = self.encoder(src, mask=self.src_mask)
        output_dec = self.decoder(output_enc)
        return output_dec
    

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = TransformerModel(ntoken = tokenizer.ntokens,
                         ninp = 64,
                         nhead = 8,
                         nhid = 64,
                         nlayers = 6)
model.to(device)



TransformerModel(
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
        )
        (linear1): Linear(in_features=64, out_features=64, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=64, out_features=64, bias=True)
        (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
    (norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
  )
  (decoder): Linear(in_features=64, out_features=87, bias=True)
  (input_emb): Embedding(87, 64)
  (pos_encoder): PositionalEmbedding(
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

## Training

In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

# Définitions
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pad_token_id = tokenizer.token_to_id["[PAD]"]

# Initialisation du modèle
model = TransformerModel(
    ntoken=tokenizer.ntokens,
    ninp=64,
    nhead=8,
    nhid=64,
    nlayers=6
).to(device)

# Définition de la fonction de perte et de l'optimiseur
criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.token_to_id["[PAD]"])  # Ignore le padding
optimizer = optim.Adam(model.parameters(), lr=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.95)  # Ajustement du LR

# Fonction d'entraînement
def train(model, dataloader, criterion, optimizer, scheduler, num_epochs=10):
    model.train()
    
    for epoch in range(num_epochs):
        total_loss = 0
        for context_batch, truth_batch in dataloader:
            context_batch, truth_batch = context_batch.to(device), truth_batch.to(device)
            
            optimizer.zero_grad()

            # On va prédire chaque token successivement avec teacher forcing
            batch_size, max_truth_len = truth_batch.shape
            max_context_len = context_batch.shape[1]

            # Initialiser input_seq avec le contexte pour chaque séquence
            input_seq = context_batch.clone()

            loss = 0
            for t in range(max_truth_len):  # Boucle sur chaque token de "truth"
                output = model(input_seq)  # output shape: (batch, seq_len, vocab_size)
                output = output[:, -1, :]  # On garde seulement la dernière prédiction
                
                target = truth_batch[:, t]  # Token cible à l’instant t
                loss += criterion(output, target)  # Calcul de la perte

                # Teacher forcing : on ajoute le token vrai à la séquence d'entrée
                new_token = target.unsqueeze(1)  # (batch, 1)
                input_seq = torch.cat([input_seq, new_token], dim=1)  # On étend la séquence

            loss /= max_truth_len  # Moyenne sur tous les tokens prédits
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        scheduler.step()  # Ajustement du LR

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(dataloader):.4f}")

# Lancement de l'entraînement
train(model, dataloader, criterion, optimizer, scheduler, num_epochs=10)

KeyboardInterrupt: 