In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import pickle

# 1. Prepare the data
class AccountSequenceDataset(Dataset):
    def __init__(self, sequences, max_len=16):
        self.sequences = sequences
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        input_sequence = sequence[:-1]
        target_sequence = sequence[1:]

        # Pad input and target sequences to max_len
        input_sequence = input_sequence + [0] * (self.max_len - len(input_sequence))
        target_sequence = target_sequence + [0] * (self.max_len - len(target_sequence))

        return torch.tensor(input_sequence, dtype=torch.long), torch.tensor(target_sequence, dtype=torch.long)

# Replace with your 6000 sequences

objects = []
# Encoded sequences:  0 -> 101
with (open("encoded_sequences.pkl", "rb")) as openfile:
    while True:
        try:
            objects.append(pickle.load(openfile))
        except EOFError:
            break

encoded_sequences = objects[0]
print(f"Length of data: {len(encoded_sequences)}")
print(f"Ex: {encoded_sequences[0]}")

#sequences = [[1, 29, 34, 12, 45], ...]
dataset = AccountSequenceDataset(encoded_sequences)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# 2. Define the transformer model
class TransformerModel(nn.Module):
    def __init__(self, vocab_size, d_model, nhead, num_layers):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, d_model)
        self.transformer = nn.Transformer(d_model, nhead, num_layers)
        self.fc = nn.Linear(d_model, vocab_size)

    def forward(self, src, tgt):
        src = self.embedding(src).transpose(0, 1)  # Transpose to (sequence_length, batch_size, d_model)
        tgt = self.embedding(tgt).transpose(0, 1)  # Transpose to (sequence_length, batch_size, d_model)
        x = self.transformer(src, tgt)
        x = self.fc(x)
        return x.transpose(0, 1)  # Transpose back to (batch_size, sequence_length, vocab_size)

# Hyperparameters
vocab_size = max([max(seq) for seq in encoded_sequences]) + 1  # Assuming account numbers start from 0
d_model = 512
nhead = 8
num_layers = 6
model = TransformerModel(vocab_size, d_model, nhead, num_layers)

# 3. Train the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

num_epochs = 100
for epoch in range(num_epochs):
    for inputs, targets in dataloader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        # Remove the last element from the target sequence before passing it to the model
        outputs = model(inputs, targets[:, :-1])
        loss = criterion(outputs.reshape(-1, vocab_size), targets[:, 1:].reshape(-1))
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}')


Length of data: 6873
Ex: [18, 18, 18, 18, 1, 6]
Epoch 1/100, Loss: 0.01813587360084057


KeyboardInterrupt: 