In [None]:
#!gzip -d "2021-07-31-lichess-evaluations-37MM.db.gz

In [1]:
DB_PATH='test.db'

In [2]:
!pip install peewee pytorch-lightning
!pip install tensorflow
!pip install tensorboard



In [3]:
from peewee import *
import base64

db = SqliteDatabase(DB_PATH)

class Evaluations(Model):
  id = IntegerField()
  fen = TextField()
  binary = BlobField()
  eval = FloatField()

  class Meta:
    database = db

  def binary_base64(self):
    return base64.b64encode(self.binary)
db.connect()
LABEL_COUNT = 37164639
print(LABEL_COUNT)
eval = Evaluations.get(Evaluations.id == 1)
print(eval.binary_base64())

37164639
b'CAAAAAAAAAAQAAAAAAAAAIEAAAAAAAAAJAAAAAAAAABCAAAAAAAAAADvABAAAAAAAAAAAAAAAAgAAAAAAAAAEAAAAAAAAACBAAAAAAAAACQAAAAAAAAAQgAAAAAAAP8AAAABEz8='


In [18]:
import os
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, IterableDataset, random_split
import pytorch_lightning as pl
from random import randrange

class EvaluationDataset(IterableDataset):
    def __init__(self, count):
        self.count = count

    def __iter__(self):
        return self

    def __next__(self):
        if self.count <= 0:
            raise StopIteration

        idx = randrange(self.count)
        eval = Evaluations.get(Evaluations.id == idx + 1)
        bin = np.frombuffer(eval.binary, dtype=np.uint8)
        bin = np.unpackbits(bin, axis=0).astype(np.single)
        eval.eval = max(eval.eval, -15)
        eval.eval = min(eval.eval, 15)
        ev = np.array([eval.eval]).astype(np.single)

        # Decrement count as we are consuming one item
        self.count -= 1

        return {'binary': bin, 'eval': ev}

    def __len__(self):
        return self.count

    def __getitem__(self, idx):
        eval = Evaluations.get(Evaluations.id == idx + 1)
        bin = np.frombuffer(eval.binary, dtype=np.uint8)
        bin = np.unpackbits(bin, axis=0).astype(np.single)
        eval.eval = max(eval.eval, -15)
        eval.eval = min(eval.eval, 15)
        ev = np.array([eval.eval]).astype(np.single)
        return {'binary': bin, 'eval': ev}

# class EvaluationDataset(IterableDataset):
#   def __init__(self, count):
#     self.count = count
#   def __iter__(self):
#     return self
#   def __next__(self):
#     idx = randrange(self.count)
#     return self[idx]
#   def __len__(self):
#     return self.count
#   def __getitem__(self, idx):
#     eval = Evaluations.get(Evaluations.id == idx+1)
#     bin = np.frombuffer(eval.binary, dtype=np.uint8)
#     bin = np.unpackbits(bin, axis=0).astype(np.single) 
#     eval.eval = max(eval.eval, -15)
#     eval.eval = min(eval.eval, 15)
#     ev = np.array([eval.eval]).astype(np.single) 
#     return {'binary':bin, 'eval':ev}    

dataset = EvaluationDataset(count=LABEL_COUNT)

In [5]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/ --host localhost

Reusing TensorBoard on port 6006 (pid 11208), started 0:04:32 ago. (Use '!kill 11208' to kill it.)

In [9]:
#!kill 1428
#%reload_ext tensorboard
%tensorboard --logdir lightning_logs/ --host localhost

Reusing TensorBoard on port 6006 (pid 11208), started 0:08:20 ago. (Use '!kill 11208' to kill it.)

In [19]:
import time
from collections import OrderedDict

class EvaluationModel(pl.LightningModule):
  def __init__(self,learning_rate=1e-3,batch_size=1024,layer_count=10):
    super().__init__()
    self.batch_size = batch_size
    self.learning_rate = learning_rate
    layers = []
    for i in range(layer_count-1):
      layers.append((f"linear-{i}", nn.Linear(808, 808)))
      layers.append((f"relu-{i}", nn.ReLU()))
    layers.append((f"linear-{layer_count-1}", nn.Linear(808, 1)))
    self.seq = nn.Sequential(OrderedDict(layers))

  def forward(self, x):
    return self.seq(x)

  def training_step(self, batch, batch_idx):
    x, y = batch['binary'], batch['eval']
    y_hat = self(x)
    loss = F.l1_loss(y_hat, y)
    self.log("train_loss", loss)
    return loss

  def configure_optimizers(self):
    return torch.optim.Adam(self.parameters(), lr=self.learning_rate)

  def train_dataloader(self):
    dataset = EvaluationDataset(count=LABEL_COUNT)
    return DataLoader(dataset, batch_size=self.batch_size, num_workers=1, pin_memory=True)

configs = [
           {"layer_count": 4, "batch_size": 512},
          #  {"layer_count": 6, "batch_size": 1024},
           ]
for config in configs:
  torch.set_float32_matmul_precision('medium')
  version_name = f'{int(time.time())}-batch_size-{config["batch_size"]}-layer_count-{config["layer_count"]}'
  logger = pl.loggers.TensorBoardLogger("lightning_logs", name="chessml", version=version_name)
  trainer = pl.Trainer(precision="16-mixed",max_epochs=1,logger=logger)
  model = EvaluationModel(layer_count=config["layer_count"],batch_size=config["batch_size"],learning_rate=1e-3)
  # trainer.tune(model)
  # lr_finder = trainer.tuner.lr_find(model, min_lr=1e-6, max_lr=1e-3, num_training=25)
  # fig = lr_finder.plot(suggest=True)
  # fig.show()
  trainer.fit(model)
  break

Using 16bit Automatic Mixed Precision (AMP)
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name | Type       | Params
------------------------------------
0 | seq  | Sequential | 2.0 M 
------------------------------------
2.0 M     Trainable params
0         Non-trainable params
2.0 M     Total params
7.847     Total estimated model params size (MB)


Training: |                                                                                                   …

RuntimeError: DataLoader worker (pid(s) 17272) exited unexpectedly

In [10]:
print(db)
print("Database Name:", db.database)
print("Is Closed:", db.is_closed())
print("Connection:", db.connection())
print("Thread-safe:", db.thread_safe)
tables = db.get_tables()

# Print the list of tables
print("Tables in the database:")
for table in tables:
    print(table)

# print(db.execute_sql("show tables"))

<peewee.SqliteDatabase object at 0x0000020B03477B50>
Database Name: test.db
Is Closed: False
Connection: <sqlite3.Connection object at 0x0000020B034C1990>
Thread-safe: True
Tables in the database:
evaluations
