## Plan for continuous training

Here's the plan. The script boots up and checks a continuous play directory which is populated with a number of training round sub-directores. Each sub-directory is labelled in sequence and contains a self_play sub-sub-directory. The script takes stock of the contents of the latest training round self_play directory.

If the number of files (tournaments) saved in the self_play directory of the latest training round is less than some minimum M, say m < M then the self-play script is kicked off to play M - m more tournaments, completing the M requred self-play tournaments and saving to the  subdirectory.

If the number of files saved in the self_play directory of the latest training round m >= M then the script checks if there is already a model saved in the training round directory.

If there is not a model saved in the training round directory, the script will kick off a training routine to create a new model (or load a saved model from a possibly existing previous round), compile a training dataset from the most recent k training rounds (augmented with the catalogue of known checkmate positions) and train the model to stopping. The script will then save the model in the training round sub-directory.

Then the script will create a new training round directory, labelled with the next integer in the training round sequence, containing a self_play directory. At this point we can essentially continue the loop from the top.

In [None]:
import os, time, torch
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

min_tournaments_each_round = 50 # A new model roughly once a day.
root = os.path.join('data','output')
checkmates = os.path.join('data','output','checkmates')

## LOOP STARTS - just kill the machine any old time when you have stuff to do and you can fire it up again whenever you're ready.
while True:

    training_round_dirs = sorted([d for d in os.listdir(root) if os.path.isdir(os.path.join(root,d)) and d != 'checkmates'])
    latest_training_round_dir = training_round_dirs[-1]
    latest_self_play_dir = os.path.join(root, latest_training_round_dir, 'self_play')

    if os.path.exists(latest_self_play_dir) and os.path.isdir(latest_self_play_dir):
        number_games_played_already = len(os.listdir(latest_self_play_dir))
    else:
        os.mkdir(latest_self_play_dir)
        number_games_played_already = 0

    print(f'Current: {latest_training_round_dir}, already played: {number_games_played_already}')

    tournaments_left_to_play = min_tournaments_each_round - number_games_played_already

    if tournaments_left_to_play > 0:
        
        # self-play script to play tournaments_left_to_play more tournaments, saving to latest_self_play_dir.

        # Same base model with different look-ahead strength configuration 
        model_kwargs = {'nlayers': 6, 'nheads': 3, 'embed_dim': 18, 'dk': 5, 'device': device,'load_path': os.path.join('data','output','WHICH_ROUND??', 'model.pt')}
        agent0_spec = {'type': 'transformer', 'kwargs': model_kwargs, 'num_simgames': 150, 'max_simmoves': 4, 'C': 1, 'p': 0.4, 'k': float('inf')}
        agent1_spec = {'type': 'transformer', 'kwargs': model_kwargs, 'num_simgames':  1, 'max_simmoves': 1, 'C': 1, 'p': 0.4, 'k': float('inf')}

        self_play_args = {
            'num_workers':2, 'num_tournaments': tournaments_left_to_play, 'agents_spec': [agent0_spec, agent1_spec], 'num_games':1, 'starting_state':None, 'max_moves':200,
            'save':True, 'result_dest':os.path.join('data','output','round_1','self_play')
        }

        # Let's play
        print(f'Playing {tournaments_left_to_play} tournaments...')
        %run -i "chess_selfplay.py"
        print(f'Self-play complete.')

        # Also include an instruction here to extract the checkmates from the latest_self_play_dir tournament games and save them in the checkmates directory.
        ...

    latest_model_path = os.path.join(root, latest_training_round_dir, 'model.pt')
    if not (os.path.exists(latest_model_path) and os.path.isfile(latest_model_path)):

        # No model saved here yet. Create and train a new model based on the previous k rounds of self-play data.
        # We currently have 591 tournaments saved in baseline and 191 in round1. We could use k = 10 and go from there?
        ...

    # Create next training round directory containing self_play sub-directory, and start loop from the top.
    next_index = int(os.path.split(latest_training_round_dir)[-1].split('_')[-1]) + 1
    next_training_round_dir = os.path.join(root, f'round_{next_index}')
    os.mkdir(next_training_round_dir)
    next_training_round_self_play_dir = os.path.join(next_training_round_dir, 'self_play')
    os.mkdir(next_training_round_self_play_dir)