<a href="https://colab.research.google.com/github/AlexisOlson/Colab/blob/master/Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# Terminal commands to set up kernel
####################################

# Update installers
pip install --upgrade pip
apt-get update

# Install Conda: https://medium.com/@error.replicator/setting-up-cloud-environment-for-deep-learning-febb5c408e78
curl -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh

########################
### Restart terminal ###
########################

# Create new Kernel
conda create -n bask python=3.7 ipykernel
conda activate bask

# For issues with psycopg2: stackoverflow.com/questions/35104097
apt install -y libpq-dev postgresql-server-dev-all

# Install chess-tuning-tools
pip install chess-tuning-tools

#Install compiler for lc0
apt-get install ninja-build libprotobuf-dev protobuf-compiler
pip install meson

# Install "bask" kernel
python -m ipykernel install --user --name=bask

# Now create/upload a notebook

In [0]:
# Set parameters
book = "https://github.com/killerducky/OpenBench/raw/lc0/Books/8moves_v3.pgn"

network = "https://training.lczero.org/get_network?sha=96bbc25ba03c6fbf00f10084fb52611ffa77f85dff93009703d6089ef03c62da"
network_name = "702639"

PR = "1288"

rounds = 8

In [0]:
# Define engines.json

text_file = open("engines.json", "w")
text_file.write("""[
    {
        "command": "./lc0_PR",
        "initStrings": [
            "setoption name Threads value 2",
            "setoption name CPuctFactor value 0.0",
            "setoption name CPuctFactorAtRoot value 0.0",
            "setoption name PolicyFactorParent value 0.0",
            "setoption name SyzygyPath value /root/tablebase.sesse.net/syzygy/3-4-5"
        ],
        "name": "lc0",
        "protocol": "uci",
        "workingDirectory": "/root"
    },
    {
        "command": "./SF_11",
        "initStrings": [
            "setoption name Threads value 4",
            "setoption name SyzygyPath value /root/tablebase.sesse.net/syzygy/3-4-5"
        ],
        "name": "sf",
        "protocol": "uci",
        "workingDirectory": "/root"
    }
]""")
text_file.close()

In [0]:
# Download Files
################

# Syzygy Endgame TB
!wget -c --recursive --level=1 --no-parent --no-check-certificate -e robots=off -ignore robots.txt wait 1 http://tablebase.sesse.net/syzygy/3-4-5/

# Network
!wget {network} -O {network_name}

# Opening Suite
!wget {book} -O book.pgn

In [0]:
# Install Stockfish 11
!wget https://github.com/official-stockfish/Stockfish/archive/sf_11.tar.gz
!tar -xvf sf_11.tar.gz
!cd Stockfish-sf_11/src && make build ARCH=x86-64-bmi2
!cp Stockfish-sf_11/src/stockfish SF_11

In [0]:
# Use gcc 8: http://www.riccardomariabianchi.com/updating-gcc-on-ubuntu.html


# Install Lc0
!rm -rf lc0
!git clone --recurse-submodules https://github.com/LeelaChessZero/lc0.git
!cd lc0 && git fetch origin pull/{PR}/head:PR{PR}
!cd lc0 && git checkout {PR}
!cd lc0 && git submodule update --init --remote
!cd lc0 && rm -rf build
!cd lc0 && mkdir -p build
!cd lc0 && meson build --buildtype release -Dblas=false -Dopencl=false -Dcudnn=true -Dgtest=false
!cd lc0/build && ninja
!cp lc0/build/lc0 lc0_PR

## Useful I/O and plotting stuff

In [0]:
import sys
print(sys.version)

In [0]:
pip install git+https://github.com/kiudee/bayes-skopt

In [0]:
%matplotlib notebook
import numpy as np
import bask

from bask import geometric_median

Utility functions to set UCI parameters

In [0]:
import json
from tune.io import InitStrings
def set_params_dict(filepath, engine_index, uci_params):
    with open(filepath, "r") as file:
        js = json.load(file)
    #subprocess.run(["rm", "engines.json"])
    
    engine = js[engine_index]
    initstr = InitStrings(engine['initStrings'])
    
    for k, v in uci_params.items():
        initstr[k] = v
    subprocess.run(["rm", "engines.json"])
    with open(filepath, 'w') as file:
        json.dump(js, file, sort_keys=True, indent=4)

def parse_results(ordo_str):
    lines = ordo_str.split("\n")[2:4]
    results = dict()
    for line in lines:
        sp = line.split()
        name = sp[1]
        rating = float(sp[3])
        results[name] = rating
    return results

This is the code to run and parse an experiment

In [0]:
import subprocess
import itertools
from scipy.stats import norm
from scipy.stats import dirichlet
import scipy.stats as st
import re

def run_experiment(rounds=2):
    subprocess.run(["rm", "out.pgn"])
    
    st = ["cutechess-cli/cutechess-cli",
          "-concurrency", "1",
          "-engine", "conf=lc0", f"tc=12+0.2",
          "-engine", "conf=sf", f"tc=6+0.1",
          "-openings", "file=book.pgn", "format=pgn", "order=random",
          "-draw", "movenumber=50", "movecount=8", "score=8",
          "-resign", "movecount=8", "score=800",
          "-rounds", f"{rounds}",
          "-repeat",
          "-games", "2",
          "-tb", "/root/tablebase.sesse.net/syzygy/3-4-5/",
          "-pgnout", "out.pgn"
    ]
    out = subprocess.run(st, capture_output=True)
    return out

def parse_experiment(out):
    lines = out.stdout.decode("utf-8").split("\n")
    last_output = lines[-4]
    result = re.findall("[0-9]+\s-\s[0-9]+\s-\s[0-9]+", last_output)[0]
    w, l, d = [float(x) for x in re.findall("[0-9]+", result)]
    total = w + d + l

    draw_rate = 0.2717
    prior_obs = 1.5
    wl_prior = (1-draw_rate) * prior_obs / 2
    dist = dirichlet(alpha=[wl_prior + w,
                            wl_prior + l,
                            draw_rate * prior_obs + d])
    raw = dist.mean().dot([-1.0, 1.0, 0.0])
    return raw * (prior_obs + total) / total

## Specify the optimization

Now we specify the optimization parameters and their ranges:

In [0]:
from skopt.learning.gaussian_process.kernels import Matern, ConstantKernel
from scipy.stats import beta, gamma, invgamma, uniform, halfnorm
from skopt.space import Real, Integer
def roundflat(x, a_low=2.0, a_high=8.0, d_low=0.005, d_high=1.2):
    return -2 * ((x/d_low)**(-2*a_low) + (x/d_high)**(2*a_high))
parameters = ["PolicyFactor", "PolicyExponent", "Cpuct", "FpuValue", "PolicyTemperature"]
dimensions = (
    Real(low=0, high=1, transform='normalize'), # PolicyFactor
#   Real(low=0, high=1, transform='normalize'), # PolicyFactorParent
    Real(low=0, high=8, transform='normalize'), # PolicyExponent
    Real(low=0, high=4, transform='normalize'), # Cpuct
    Real(low=0, high=2, transform='normalize'), # FpuValue
    Real(low=0, high=2, transform='normalize')  # PolicyTemperature
)
priors = [
    lambda x: halfnorm(scale=1.).logpdf(np.sqrt(np.exp(x))) + x / 2.0 - np.log(2.0),
    lambda x: roundflat(np.exp(x), d_low=0.005, d_high=0.8) + x,
    lambda x: roundflat(np.exp(x), d_low=0.005, d_high=0.8) + x,
    lambda x: roundflat(np.exp(x), d_low=0.005, d_high=0.8) + x,
    lambda x: roundflat(np.exp(x), d_low=0.005, d_high=0.8) + x,
    lambda x: roundflat(np.exp(x), d_low=0.005, d_high=0.8) + x,
    lambda x: halfnorm(scale=0.4).logpdf(np.sqrt(np.exp(x))) + 
    x / 2.0 - np.log(2.0)
]
kernel = (
    ConstantKernel(constant_value=0.0157**2, constant_value_bounds=(0.01**2, 0.2**2))
    * Matern(length_scale=np.ones(len(dimensions))*0.3, length_scale_bounds=(0.1,0.8),nu=1.5)
)

Now we create an optimizer, which will utilize the kernel and the priors to efficiently explore the parameter space:

In [0]:
from bask import Optimizer
random_state = np.random.RandomState(123)

opt = Optimizer(
    dimensions=dimensions,
    n_points=750,
    n_initial_points=30,
    gp_kernel=kernel,
    gp_kwargs=dict(normalize_y=True),
    gp_priors=priors,
    acq_func="pvrs",
    acq_func_kwargs=dict(n_thompson=20),
    random_state=random_state
)

Initialize the lists containing the evaluated points and their scores. We also define a filename in which to save X and y (to resume if necessary).

In [0]:
X = []
y = []
iteration = len(X)
filename = 'filename.npz'

Should we need to resume, we can do so here:

In [0]:
importa = np.load(filename)
X = importa['arr_0'].tolist()
y = importa['arr_1'].tolist()
iteration = len(X)
print(iteration)

# Initialize the optimizer again
opt.tell(X, y, gp_burnin=200, gp_samples=300, n_samples=0, progress=True);
#opt.gp.sample(n_burnin=100, priors=priors);
opt.gp.kernel_

## Optimization loop
Main optimization loop (can be cancelled and resumed by stopping)

In [0]:
last_iteration = 0

In [0]:
mkdir plots

In [0]:
from skopt import Space
from skopt.space import Real
from skopt.learning.gaussian_process.kernels import ConstantKernel
from skopt.utils import create_result, normalize_dimensions
from tune.plots import plot_objective
import matplotlib.pyplot as plt
import time
from datetime import datetime
import warnings
warnings.filterwarnings(action='ignore')
while True:
    # Plot the current model:
    print(f"\nStart iteration {iteration}")
    if iteration >= 30 and iteration - last_iteration >= 5:
        last_iteration = iteration
        plt.style.use('dark_background')
        space = normalize_dimensions(dimensions)
        fig, ax = plt.subplots(nrows=space.n_dims, ncols=space.n_dims,
                               figsize=(3 * space.n_dims, 3 * space.n_dims))
        fig.patch.set_facecolor('#36393f')
        for i in range(space.n_dims):
            for j in range(space.n_dims):
                ax[i, j].set_facecolor('#36393f')
        warnings.filterwarnings(action='ignore')
        timestr = time.strftime("%Y%m%d-%H%M%S")
        space = normalize_dimensions(dimensions)
        #set_theta(geometric_median(sample_dict['flatchain']), sample_dict['gp'])
        result_object = create_result(Xi=X, yi=y, space=space, models=[opt.gp])
        plot_objective(result_object, levels=20, size=3, n_points=200, n_samples=30,
                   dimensions=parameters, alpha=0.25,
                fig=fig, ax=ax);
        try:
            plt.savefig(f"plots/{timestr}-{iteration}.png", pad_inches=0.1, dpi=300, bbox_inches="tight")
        except:
            pass
        plt.close()
    
    # Actual optimization loop starts here:
    point = opt.ask()
    print("Testing {}".format(point))
    
    # Here we need to pass the parameters to the experiment:
    set_params_dict('engines.json', engine_index=0, uci_params=dict(zip(parameters, point)))
    
    # The actual experiment
    print("Start experiment")
    now = datetime.now()
    out_exp = run_experiment(rounds=10)
    later = datetime.now()
    difference = (later - now).total_seconds()
    print(f"Experiment finished ({difference}s)")
    score = parse_experiment(out_exp)
    
    # We update the model here:
    while True:
        try:
            now = datetime.now()
            if opt.gp.chain_ is None:
                opt.tell(point, score, n_samples=0, gp_samples=300, gp_burnin=200)
            else:
                opt.tell(point, score, n_samples=0, gp_samples=300, gp_burnin=10)
            later = datetime.now()
            difference = (later - now).total_seconds()
            print(f"GP sampling finished ({difference}s)")
        except ValueError:
            opt.gp.sample(n_burnin=100, priors=priors)
        else:
            break
    print(opt.gp.kernel_)
    print("Got score: {}".format(score))
    X.append(point)
    y.append(score)
    iteration = len(X)
    np.savez_compressed(filename, np.array(X), np.array(y))