In [None]:
# Dependencies

import subprocess
import sys
def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", "--user", package])

install("tensorflow")
subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "typing"])
install("setproctitle")
install("ray==1.7")
install("aioredis==1.3.1")
install("modin")
import ray
import numpy as np
from numpy.random import SeedSequence, default_rng

from ConnectFour import ConnectFour, GAMENOTOVER, PLAYERNONE
from MCTS import get_move
from NeuralNetwork import initialize_network, get_network, train_network
from DataManager import DataManager

In [None]:
# Number of iterations of self play and retraining
ITERATIONS = 2000

# Number of games to play per iteration
EPISODES = 100

# My Mac has 8 CPUS
CPUS = 8
ray.init(num_cpus=CPUS, ignore_reinit_error=True)

In [None]:
# Records a game state into the data
def record(new_data, g, pi, flip_pi, result, q):
  board = g.get_board().get_arr()
  flip = g.get_flip().get_arr()
  labels = np.array([(result + q) / 2])
  norm_labels = np.append(labels, pi)
  flip_labels = np.append(labels, flip_pi)
  new_data = np.append(new_data, np.append(board, norm_labels))
  new_data = np.append(new_data, np.append(flip, flip_labels))
  return new_data

# Plays a game
def play_game(g, network, rng):
  winner = g.get_winner()
  if winner != GAMENOTOVER:
    if winner == PLAYERNONE:
      return 0, np.array([])
    else:
      return -1, np.array([])
  else:
    move, pi, q = get_move(g, network, rng, training=True, simulations=800)
    flip_pi = np.flip(pi)
    g.move(move)
    result, new_data = play_game(g, network, rng)
    g.undo()
    new_data = record(new_data, g, pi, flip_pi, result, q)
    return -result, new_data

# Executes an episode of self play
@ray.remote
def self_play(seed):
  rng = default_rng(seed)
  print('playing')
  g = ConnectFour()
  network = get_network()
  result, new_data = play_game(g, network, rng)
  return new_data

In [None]:
initialize_network()
data_manager = DataManager()
# data_manager.add_data_from_file('')
# X, y1, y2, validation = data_manager.get_data()
# train_network(X, y1, y2, validation)
ss = SeedSequence(12345)  # generate sources of randomness so parallel processes are not correlated
seeds = ss.spawn(ITERATIONS * CPUS)

In [None]:
# Runs the training process
for iteration in range(ITERATIONS):
  futures = []
  for i in range(EPISODES):
    # Limit the number of concurrent processes to the number of CPUS
    if i > CPUS:
      num_ready = i - CPUS
      ray.wait(futures, num_returns=num_ready)
    futures.append(self_play.remote(seeds[iteration * CPUS + i]))
  new_data = ray.get(futures)
  data_manager.add_data(new_data)
  data_manager.save(f'Data/data{iteration}.txt')
  X, y1, y2, validation = data_manager.get_data()
  train_network(X, y1, y2, validation)

# Note on output: Boards are shown with 1 representing the player who is playing next,
# 2 representing the player who just played. Q is the quality from the point of view of
# the player who just played.