# Multiprocessing Tournament Trainer

In [2]:
import multiprocessing
import time
import os
from kaggle_environments import make
from kaggle_environments.envs.halite.helpers import *
import random
from functools import partial
import multiprocessing as mp
import math
import numpy as np
import os, glob
import sys, getopt

In [3]:
TRAIN_TARGET = 'geneticBot'
WEIGHT_SIZE = [6,6,6,2,3,1,1,1]

STEP_ARR = np.array(
[0.2,50,0.15,0.15,0.04,100,
0,0,0,0,0,0.3,
0,0,0,0,0,0,
0.15,0.2,
0.2,0.4,0.4,
30,
0.05,
1])

POP = 64  #DONT CHANGE
ITER = 200
STEP = 0.2  #TOO LARGE FOR TUNING
N_CPU = POP

In [4]:
def store_list_of_arr(arr,path):
    a = open(path,'w+')
    for i in arr:
        a.write(" ".join(list(map(str,i.tolist()))))
        a.write("\n")
    a.close()

In [5]:
def run(agents,size=21,steps=400,seed=1):
    env = make("halite", configuration={"size": size, "startingHalite": 24000,"episodeSteps": steps,'randomSeed':seed}, debug=False)
    env.reset(len(agents))
    env.run(agents)
    return env

def fitness_halite(env):
    state1,state2,state3,state4 = env.state[0],env.state[1],env.state[2],env.state[3]
    reward1,reward2,reward3,reward4= state1.reward,state2.reward,state3.reward,state4.reward
    return reward1,reward2,reward3,reward4

def double_fitness(agent, n, a):
    # Run 5 1v1 against comp1.py and take average fitness_halite
    res = 0
    #print(n,a)
    
    for randomSeed in range(a,n+a):
        print("Seed-",randomSeed)
        res += fitness_halite(run([agent,'old/py/badBotv1.0.py','old/py/geneticBotv1.3.py','old/py/geneticBotv1.2.py'],seed=randomSeed)) / n
    return res

def test_fitness(weights):
    return sum(weights)


In [6]:
def getscore(agents,size=21,steps=400,seed=np.random.randint(10,100)):
    '''
    对局种子为每局开始前随机生成，返回最优的bot，返回值为bot路径
    '''
    #print(agents)
    INF = 9999999999
    a,b,c,d = fitness_halite(run(agents,seed=seed))
    if a == None:
        a = -INF
    if b==None:
        b = -INF
    if c==None:
        c = -INF
    if d==None:
        d = -INF
    best = max([a,b,c,d])
    if a==best:
        return agents[0]
    elif b==best:
        return agents[1]
    elif c==best:
        return agents[2]
    elif d==best:
        return agents[3]

In [7]:
def elimination(botlist):
    '''
    淘汰赛制，返回晋级bot的列表
    workload里面存的是bot的路径
    '''
    result = []
    np_array=np.asarray(botlist)
    workload = np_array.reshape(4, -1).T.tolist()
    pool = mp.Pool()
    result.append(pool.map(getscore,workload))
    pool.close()
    pool.join()
    return result

def tournament(botlist):
    '''
    淘汰赛制，第一个返回的是前25%的bot，第二个返回的是最好的bot
    返回值为bot的路径
    '''
    firstround = elimination(botlist)
    secondround = elimination(firstround)
    thirdround = elimination(secondround)
    return firstround, thirdround

In [8]:
init_weights = []

# Uniform crossover
def crossover(parent1,parent2):
    if parent1.shape != parent2.shape:
        print("Shapes must be the same!")
    result = parent1.copy()
    cross = np.random.choice([True,False],parent1.shape)
    result[cross] = parent2[cross]
    return result

# Uniform mutation by step
def mutation(target,step,stepArr=None):
        target = target.astype('float64')
        res = target.copy()
        res = res + np.random.uniform(-step,step,res.shape) * stepArr
        return res

def reset():
    files = glob.glob('trainweights/*')
    for f in files:
        os.remove(f)

def build(weights):
    store_list_of_arr(weights,TRAIN_TARGET+'/weights.txt')
    os.system("python3 build.py "+TRAIN_TARGET)

def convert(weights):
    # Converts a thing to a program readable list of arrays
    a = 0
    res = []
    for i in WEIGHT_SIZE:
        res.append(weights[a:a+i])
        a+=i
    return res

# Load all weights in trainweights
def load():
    res = []
    for filepath in glob.iglob('trainweights/*.txt'):
        file = open(filepath,'r')
        a = file.read()
        file.close()
        res.append(np.array(a.split()))
    return res
def tournament_build(weights):
    '''
    输入为所有weights的list,一共会在tournament文件夹下生成64个python bot文件和64个weights
    '''
    for i in range(len(weights)):
        path = "tournament/"
        if not os.path.exists(path):
            os.mkdir(path)   
        output = open(path+str(i)+'.py',"w+")
        store_list_of_arr(convert(weights[i]),path+str(i)+".txt")
        f = open(path+str(i)+".txt","r")
        a = f.read()
        f.close()
        a = a.rstrip()
        a = "weights='''"+a+"'''"
        a = a + '\n'
        output.write(a)
        f = open(TRAIN_TARGET+"/dependency.py","r")
        for line in f:
            output.write(line)
        output.write("\n")
        f.close()
        files = glob.glob(TRAIN_TARGET+"/*.py")
        for file in files:
            if file == TRAIN_TARGET + "/agent.py" or file == TRAIN_TARGET+"/dependency.py":
                continue
            f = open(file,"r")
            for line in f:
                if line.startswith('from') or line.startswith ('import'):
                    continue
                output.write(line)
            output.write("\n")
            f.close()
        f = open(TRAIN_TARGET + "/agent.py","r")
        for line in f:
            if line.startswith('from') or line.startswith ('import'):
                continue
            output.write(line)
        f.close()
        output.close()

## Before Training: Disable all "print" functions

In [None]:
if __name__ == '__main__':
    '''
    预加载所有参数部分，population暂时固定64（方便操作）
    '''
    population = POP
    step = STEP
    iterations = ITER
    initial= load()
    N = sum(WEIGHT_SIZE)
    batch = None
    #TODO：optimize generation of initial chromosome (generate from best solutions)
    if initial != None:
        batch = initial
        a = 0
        if len(initial) != population:
            a = population - len(initial)
        for i in range(a):
            batch.append(np.random.uniform(-step*10,step*10,(N)))
    else:
        batch = np.array([np.random.uniform(-step*10,step*10,(N))for pop in range(population)])
    '''
    这里开始正式开始训练
    '''
    print("Start Training. Training population: %s Training iteration: %s" % (population,iterations))
    for i in range(iterations):
        '''
        保存所有weights
        '''
        print("========================")
        print("Iteration", i, "starting")
        if i % 1 == 0: #Tunable
            print("Saving all weights")
            reset()
            for j,agent in enumerate(batch):
                store_list_of_arr(convert(agent),'trainweights/'+str(j)+".txt")
        '''
        淘汰赛选出合适样本和最佳样本
        '''
        tournament_build(batch)
        botlist = []
        path = 'tournament/'
        for j in range(population):
            botlist.append(path+str(j)+'.py')
        result,best = tournament(botlist)
        stay = []
        for j in result[0]:
            j = j.replace('tournament/','')
            s = j.split(".")
            res = int(s[0])
            stay.append(batch[res])
        print(best)
        mutate = [mutation(x,step,STEP_ARR) for x in stay]
        cross = [crossover(random.choice(stay),random.choice(stay)) for x in stay]
        both = [mutation(crossover(random.choice(stay),random.choice(stay)),step,STEP_ARR) for x in stay]
        batch = stay + mutate + cross + both

Start Training. Training population: 64 Training iteration: 200
Iteration 0 starting
Saving all weights
[['tournament/21.py']]
Iteration 1 starting
Saving all weights
[['tournament/52.py']]
Iteration 2 starting
Saving all weights
[['tournament/21.py']]
Iteration 3 starting
Saving all weights
[['tournament/21.py']]
Iteration 4 starting
Saving all weights
[['tournament/37.py']]
Iteration 5 starting
Saving all weights
[['tournament/36.py']]
Iteration 6 starting
Saving all weights
[['tournament/55.py']]
Iteration 7 starting
Saving all weights
[['tournament/43.py']]
Iteration 8 starting
Saving all weights
[['tournament/57.py']]
Iteration 9 starting
Saving all weights
[['tournament/53.py']]
Iteration 10 starting
Saving all weights
[['tournament/26.py']]
Iteration 11 starting
Saving all weights
[['tournament/10.py']]
Iteration 12 starting
Saving all weights
[['tournament/13.py']]
Iteration 13 starting
Saving all weights
[['tournament/23.py']]
Iteration 14 starting
Saving all weights
[['tourna

[['tournament/37.py']]
Iteration 92 starting
Saving all weights
[['tournament/30.py']]
Iteration 93 starting
Saving all weights
[['tournament/37.py']]
Iteration 94 starting
Saving all weights
[['tournament/25.py']]
Iteration 95 starting
Saving all weights
[['tournament/0.py']]
Iteration 96 starting
Saving all weights
[['tournament/35.py']]
Iteration 97 starting
Saving all weights
[['tournament/5.py']]
Iteration 98 starting
Saving all weights
[['tournament/1.py']]
Iteration 99 starting
Saving all weights
[['tournament/32.py']]
Iteration 100 starting
Saving all weights
[['tournament/47.py']]
Iteration 101 starting
Saving all weights
[['tournament/54.py']]
Iteration 102 starting
Saving all weights
[['tournament/27.py']]
Iteration 103 starting
Saving all weights


In [9]:
'''
初始化所有weights，清零所有weights,统一换成之前比较好的weights
!!!提前保存weights再运行!!!
'''
agent = np.array([2.0,-0.8236757404998933,0.8020703007978108,0.80062471268605204,450,
0.03212479148393029, 2.5914597642192163,
0.5, 200.02848810312122,
0.1, 1.0,
0.9403425380141911, -3.1819320805078153, -3,
120])
a = []
for i in range(64):
    a.append(agent)
for j,agent in enumerate(a):
    store_list_of_arr(convert(agent),'trainweights/'+str(j)+".txt")
