# Imports and settings

In [1]:
%load_ext autoreload
%autoreload 1

import numpy as np
import os
from environments import HexGame
from networks import CNN, ANN
from mcts import MCTS
from agents import MCTSAgent, ANNAgent
from memory import Memory
import matplotlib.pyplot as plt
import ray

os.environ["WANDB_NOTEBOOK_NAME"] = "testing.ipynb"
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mda9elkh[0m (use `wandb login --relogin` to force relogin)


True

In [2]:
def preprocessing():
    filename = "/Users/daniel/Documents/AIProg/Assignments/Assignment 2/cases/r_1500_mcts/train_samples"
    states = np.loadtxt(filename + '_states.txt', dtype=np.int32)
    dists = np.loadtxt(filename + '_dists.txt', dtype=np.float32)

    bits = lambda s: format(s if s == 1 else 2, f"0{2}b")
    new_states = np.zeros((states.shape[0], 100))

    for i in range(len(states)):
        new_states[i] = np.array([float(s) for s in list(''.join([bits(s) for s in states[i]]))])

    return new_states.astype(np.float32), dists

states, dists = preprocessing()

In [None]:
env = HexGame(size=4)
ann = ANN.build(learning_rate=0.001, input_size=len(env.ann_state), output_size=len(env.legal_binary_moves), activation="relu", optimizer="adam", hidden_size=(100, 100))
annagent = ANNAgent(network=ann)
mcts = MCTS(environment=env, rollout_policy_agent=annagent, use_time_budget=False, rollouts=1500, c=1.4, epsilon=1)
agent = MCTSAgent(environment=env, mcts=mcts)
memory = Memory(sample_size=1.0, queue_size=10000, verbose=False)

## PLOTTING
train_accuracies = []
train_loss = []

fig = plt.figure(figsize=(12, 5))
gs = fig.add_gridspec(1, 2)
ax1 = fig.add_subplot(gs[0, 0])
ax1.set_title("Accuracy")
ax2 = fig.add_subplot(gs[0, 1])
ax2.set_title("Loss")
## /PLOTTING

decay = 0.05**(1/1000)
i=0

def save_model():
    ann.save_model(suffix=f"S{env.size}_B{i}")

save_model()

wandb.init(project="hex", config={"mcts": mcts.config, "ann": ann.config})

while True:
    env.reset()
    while not env.is_game_over:
        best_move, distribution = agent.get_move(greedy=False)
        memory.register("player", env.current_player)
        memory.register("action", best_move)
        memory.register("state", env.ann_state)
        memory.register("distribution", distribution.flatten().tolist())
        env.play(best_move)

    memory.register_result(env.result)

    samples = memory.all()
    result = ann.train_on_batch(samples[2], samples[3], samples[4])
    i += 1

    wandb.log({"accuracy": result["accuracy"], "loss": result["loss"], "epsilon": mcts.epsilon})
    mcts.epsilon *= decay


    if i % 20 == 0:
        save_model()


In [2]:
env = HexGame(size=4)
ann = ANN.build(learning_rate=0.001, input_size=len(env.ann_state), output_size=len(env.legal_binary_moves), activation="relu", optimizer="adam", hidden_size=(100, 100))
annagent = ANNAgent(network=ann)
mcts = MCTS(environment=env, rollout_policy_agent=annagent, use_time_budget=False, rollouts=1500, c=1.4, epsilon=1)
agent = MCTSAgent(environment=env, mcts=mcts)
memory = Memory(sample_size=1.0, queue_size=10000, verbose=False)

2022-04-03 18:13:29.719788: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [36]:
import tensorflow as tf
import numpy as np


class LiteModel:
    
    @classmethod
    def from_file(cls, model_path):
        return LiteModel(tf.lite.Interpreter(model_path=model_path))
    
    @classmethod
    def from_keras_model(cls, kmodel):
        converter = tf.lite.TFLiteConverter.from_keras_model(kmodel)
        tflite_model = converter.convert()
        return LiteModel(tf.lite.Interpreter(model_content=tflite_model))
    
    def __init__(self, interpreter):
        self.interpreter = interpreter
        self.interpreter.allocate_tensors()
        input_det = self.interpreter.get_input_details()[0]
        output_det = self.interpreter.get_output_details()[0]
        self.input_index = input_det["index"]
        self.output_index = output_det["index"]
        self.input_shape = input_det["shape"]
        self.output_shape = output_det["shape"]
        self.input_dtype = input_det["dtype"]
        self.output_dtype = output_det["dtype"]

    def __call__(self, inp):
        return self.predict(inp)

    def predict(self, inp):
        inp = inp.astype(self.input_dtype)
        count = inp.shape[0]
        out = np.zeros((count, self.output_shape[1]), dtype=self.output_dtype)
        for i in range(count):
            self.interpreter.set_tensor(self.input_index, inp[i:i+1])
            self.interpreter.invoke()
            out[i] = self.interpreter.get_tensor(self.output_index)[0]
        return out
    
    def predict_single(self, inp):
        """ Like predict(), but only for a single record. The input data can be a Python list. """
        inp = np.array([inp], dtype=self.input_dtype)
        self.interpreter.set_tensor(self.input_index, inp)
        self.interpreter.invoke()
        out = self.interpreter.get_tensor(self.output_index)
        return out[0]

In [37]:
from tensorflow.keras import Sequential 

In [38]:
model = Sequential.from_config(ann.model.get_config())
model.set_weights(ann.model.get_weights())

In [39]:
agent1 = ANNAgent(environment=env, network=ANN(model=model))

In [40]:
agent1.get_move(greedy=True)

((1, 3),
 array([0.0650397 , 0.06574406, 0.06357822, 0.05789192, 0.05793533,
        0.06129165, 0.06438028, 0.06808408, 0.05823367, 0.06183377,
        0.06072819, 0.06303644, 0.06495878, 0.05859031, 0.06064562,
        0.06802795], dtype=float32))

In [41]:
annagent.environment = env
annagent.get_move(greedy=True)

((1, 3),
 array([0.0650397 , 0.06574406, 0.06357822, 0.05789192, 0.05793533,
        0.06129165, 0.06438028, 0.06808408, 0.05823367, 0.06183377,
        0.06072819, 0.06303644, 0.06495878, 0.05859031, 0.06064562,
        0.06802795], dtype=float32))

In [42]:
lmodel = LiteModel.from_keras_model(model)
lagent = ANNAgent(environment=env, network=ANN(model=lmodel))
lagent.get_move(greedy=True)

INFO:tensorflow:Assets written to: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpbo1jyy5k/assets


INFO:tensorflow:Assets written to: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpbo1jyy5k/assets
2022-04-03 18:26:02.109114: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:363] Ignored output_format.
2022-04-03 18:26:02.109134: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:366] Ignored drop_control_dependency.
2022-04-03 18:26:02.109296: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpbo1jyy5k
2022-04-03 18:26:02.110264: I tensorflow/cc/saved_model/reader.cc:107] Reading meta graph with tags { serve }
2022-04-03 18:26:02.110276: I tensorflow/cc/saved_model/reader.cc:148] Reading SavedModel debug info (if present) from: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpbo1jyy5k
2022-04-03 18:26:02.114246: I tensorflow/cc/saved_model/loader.cc:210] Restoring SavedModel bundle.
2022-04-03 18:26:02.136176: I tensorflow/cc/saved_model/loader.cc:194] Running initializatio

AttributeError: 'numpy.ndarray' object has no attribute 'numpy'

In [None]:

@ray.remote
class ANNTrainer:
    def __initialize__(self, memory):
        self.memory = memory
        self.ann = ANN.build(learning_rate=0.001, input_size=len(env.ann_state), output_size=len(env.legal_binary_moves), activation="relu", optimizer="adam", hidden_size=(100, 100))

    def train():
        result = ann.train_on_batch(samples[2], samples[3], samples[4])


@ray.remote
class ANETPredictor:
    def __initialize__(self, storage):
        pass


@ray.remote
class MCTSWorker:
    def __initialize__(self, memory, anet_config):
        self.environment = HexGame(size=4)
        self.model = Sequential.from_config(anet_config)
        self.memory = memory

    def simulate(self):
        self.set_weights()
        self.environment.reset()
        while not env.is_game_over:
            best_move, distribution = agent.get_move(greedy=False)
            
            memory.register("player", self.environment.current_player)
            memory.register("action", best_move)
            memory.register("state", self.environment.ann_state)
            memory.register("distribution", distribution.flatten().tolist())
            
            self.environment.play(best_move)

        memory.register_result(env.result)
        pass

    def set_weights(self):
        self.model.set_weights(ray.get(storage)["weights"])


# Memory
memory = Memory(sample_size=1.0, queue_size=10000, verbose=False)
ray_memory = ray.put(memory)

# Workers
trainer = ANNTrainer.remote(memory)
mcts_workers = [MCTSWorker.remote(memory, anet_config) for i in range(2)]

In [None]:
import time
import random

@ray.remote
class Predictor:
    def __init__(self):
        pass

    def loop(self):
        pass

    def wait(self, state):



@ray.remote
class Workers:
    def __init__(self, pred):
        self.predictor = pred

    def train(self):
        time.sleep(random.randint(0,6))
        self.predictor.wait(id(self), 10)

        
        




In [1]:
import time
import ray

def function(test):
    time.sleep(4)
    return test

ray.init()
test = function.remote("hei")

In [48]:
ray.wait(test)

0.15743726905844901