In [1]:
# create dataset

data = open("Rapport.pgn", "r").read()

DELIM = '|'
chars = set(list(data))
chars.add(DELIM)

parts = data.split("\n\n")
games = ["\n".join([DELIM, header, "", moves, DELIM]) for header, moves in zip(parts[::2], parts[1::2])]

print(games[10])

|
[Event "16th Donau Open"]
[Site "Aschach Donau AUT"]
[Date "2007.12.27"]
[Round "3"]
[White "Rapport,R"]
[Black "Danilov,I"]
[Result "1/2-1/2"]
[WhiteElo "2197"]
[BlackElo "2414"]
[ECO "C66"]

1.e4 e5 2.Nf3 Nc6 3.Bb5 a6 4.Ba4 d6 5.c3 Nf6 6.O-O Bd7 7.Re1 h6 8.d4 Qe7
9.d5 Nd8 10.Nbd2 g6 11.Nf1 Bg7 12.Ng3 O-O 13.Bxd7 Nxd7 14.Nd2 Kh7 15.c4 Bf6
16.Nf3 Bg5 17.Nxg5+ hxg5 18.Qg4 f6 19.h4 Nf7 20.h5 Nh6 21.hxg6+ Kg7 22.Nf5+ Nxf5
23.exf5 Rh8 24.g3 Rh6 25.Be3 Rah8 26.Qe4 Rh2 27.Kf1 R8h3 28.g4 Qe8 29.Ke2 Qh8
30.Rg1 Qh4 31.b4 b6 32.a4 a5 33.bxa5 Nc5 34.Bxc5 Rb3 35.Ra2 bxc5 36.Ke1 Qh3
37.Ke2 Qc3 38.Rc2 Qxa5 39.Rh1 Rbh3 40.Rxh2 Rxh2 41.Kf3 Qxa4 42.Kg3 Rh8 43.Rc1 Qa3+
44.Qe3 Qa2 45.Qc3 Ra8 46.Kg2 Qe2 47.Qf3 Qb2 48.Rh1 Ra3 49.Qd1 Qc3 50.Rh7+ Kg8
51.Rh8+ Kg7 52.Rh7+ Kg8 53.Rh8+  1/2-1/2
|


In [2]:
stoi = {c: i for i, c in enumerate(sorted(chars))}
itos = {i: c for c, i in stoi.items()}

DELIM_INT = stoi['|']

In [None]:
# create one-hot encoding of a training item
import torch

CHARSET_SIZE = len(chars)

game = games[0]

xs_char = game[:-1]
ys_char = game[1:]

xs = torch.tensor([stoi[x] for x in xs_char], dtype=torch.int64)
ys = torch.tensor([stoi[y] for y in ys_char], dtype=torch.int64)

xenc = torch.nn.functional.one_hot(xs, num_classes=CHARSET_SIZE).float()
xenc

tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]])

In [77]:
# create input data

# this consists of a one-hot encoding of the sequence of first characters in
# each bigram, plus an integer tensor of the second characters, used later
# to calculate the loss

xs_char, ys_char = [], []

for game in games:
    xs_char += game[:-1]
    ys_char += game[1:]

xs = torch.tensor([stoi[x] for x in xs_char], dtype=torch.int64)
ys = torch.tensor([stoi[y] for y in ys_char], dtype=torch.int64)

xenc = torch.nn.functional.one_hot(xs, num_classes=CHARSET_SIZE).float()

In [55]:
# create linear neural network layer
SEED = 0
generator = torch.Generator().manual_seed(SEED)
W = torch.randn(CHARSET_SIZE, CHARSET_SIZE, generator=generator, requires_grad=True)

In [49]:
# forward pass
logits = (xenc @ W)
counts = logits.exp()
probs = counts / counts.sum(1, keepdim=True)
loss = -probs[torch.arange(len(ys)), ys].log().mean()
loss.item()

4.895829200744629

In [47]:
# backward pass

W.grad = None
loss.backward()

In [48]:
# update
W.data += -1 * W.grad

In [86]:
# training loop

NUM_ITERATIONS = 1000
ADJUSTMENT = 100

for _ in range(NUM_ITERATIONS):

    logits = (xenc @ W)
    counts = logits.exp()
    probs = counts / counts.sum(1, keepdim=True)
    loss = -probs[torch.arange(len(ys)), ys].log().mean()

    print(loss.item())

    W.grad = None
    loss.backward()

    W.data += (-ADJUSTMENT * W.grad)

2.2027552127838135
2.2026429176330566
2.2025318145751953
2.202420949935913
2.2023112773895264
2.202202320098877
2.2020938396453857
2.201986312866211
2.2018792629241943
2.201772928237915
2.2016677856445312
2.2015631198883057
2.2014589309692383
2.2013556957244873
2.2012531757354736
2.201150894165039
2.2010498046875
2.2009494304656982
2.2008490562438965
2.2007498741149902
2.2006514072418213
2.2005534172058105
2.200456142425537
2.200359344482422
2.200263023376465
2.200167417526245
2.2000725269317627
2.1999781131744385
2.1998844146728516
2.199791431427002
2.1996986865997314
2.1996066570281982
2.199514865875244
2.1994242668151855
2.199333906173706
2.1992440223693848
2.199154853820801
2.199066400527954
2.1989781856536865
2.198890447616577
2.198803424835205
2.198716402053833
2.1986305713653564
2.198544979095459
2.1984598636627197
2.1983752250671387
2.198291063308716
2.1982076168060303
2.198124647140503
2.1980419158935547
2.1979596614837646
2.197877883911133
2.1977968215942383
2.197715997695923