In [None]:
import logging
logging.basicConfig(level=logging.ERROR)

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

from matplotlib import gridspec

from helper import *
from peg_sim import PegSimulator
from manage import startSession, finishSession

In [None]:
# Global Settings
startSession(memory=0.5)

STATE_ENCODING = (33, 2)
ACTION_ENCODING = (8, 2)
PERFECT_ENCODING = (False and STATE_ENCODING == (33, 2))

<h2>State Auto Encoder</h2>

In [None]:
from sae import StateAutoEncoder

In [None]:
# Load/Create Train Data

try:
    #raise Exception('Enforce data generation')
    data = np.load('data/sae_data.npy')

    # Add important datapoints
    data = np.concatenate((np.expand_dims(PegSimulator.getBoardEmpty()[1], axis=0), data))
    data = np.concatenate((np.expand_dims(PegSimulator.getBoardFull()[1], axis=0), data))
except:
    #data = PegSimulator.sampleRandom(int(1e5))
    data = PegSimulator().sampleSequence(int(1e5), unique=True)
    #np.save('data/sae_data.npy', data)

In [None]:
# Initialize/Load Model

network = [[4, 7], 275]

sae_model = StateAutoEncoder(network_shape=network, latent_shape=STATE_ENCODING, domain=True, use_latent=PERFECT_ENCODING)
sae_model.load()

In [None]:
# Train Model

data_latent = None
if PERFECT_ENCODING:
    data_latent = PegSimulator.perfectEncoding(data)
    data_latent = data_latent[:,:, np.newaxis]
    data_latent = np.concatenate((1 - data_latent, data_latent), axis=2)

sae_model.setWeight(2.0, 1.0, 0.0)
loss, loss_eval = sae_model.train(data, data_latent=data_latent)

plt.plot(loss, label='Loss')
plt.plot(loss_eval, label='Loss Evaluation')
plt.legend(loc='best')
plt.savefig('image/loss_sae.png', dpi=100)

In [None]:
# Analyse Latent Output

data_input = PegSimulator.sampleRandom(3)
data_input = np.concatenate((data_input, np.expand_dims(PegSimulator.getBoardEmpty()[1], axis=0)))
data_input = np.concatenate((data_input, np.expand_dims(PegSimulator.getBoardFull()[1], axis=0)))

data_input = PegSimulator().sampleSequence(50, one_game=True)[-5:]

latent_softmax = sae_model.encode(data_input)
latent_softmax_round = roundLatent(latent_softmax)

data_output = sae_model.decode(latent_softmax)
data_output_round = sae_model.decode(latent_softmax_round)

for i in range(data_input.shape[0]):
    plt.figure(figsize=(80, 20))

    # display original
    ax = plt.subplot(1, 4, 1)
    plt.imshow(data_input[i][:, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Input', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display latent
    ax = plt.subplot(1, 4, 2)
    plt.imshow(np.transpose(latent_softmax[i]), cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Latent', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(1, 4, 3)
    plt.imshow(data_output[i][:, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Output', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction rounded
    ax = plt.subplot(1, 4, 4)
    plt.imshow(data_output_round[i][:, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Output Latent Rounded', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # save figure
    #plt.savefig('image/sae_test_{}.png'.format(i), dpi=100)

In [None]:
# Analyse Latent Difference

data_input = PegSimulator.sampleControlled()
data_input = np.concatenate((data_input, np.expand_dims(PegSimulator.getBoardEmpty()[1], axis=0)))

latent_softmax = sae_model.encode(data_input)
latent_softmax_round = roundLatent(latent_softmax)

# Only using Rounded Latent Variables, Check for Good Encoding Above
for i in range(data_input.shape[0] - 1):
    plt.figure(figsize=(80, 20))
    gs = gridspec.GridSpec(1, 2, width_ratios=[1, 4])

    # display input
    ax = plt.subplot(gs[0])
    plt.imshow(data_input[i][:, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Input Difference', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display latent difference
    ax = plt.subplot(gs[1])
    latent_diff = np.abs(latent_softmax_round[i][:, 0] - latent_softmax_round[-1][:, 0])
    latent_diff = np.expand_dims(latent_diff, axis=1)
    plt.imshow(np.transpose(latent_diff), cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Encoding Difference', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # save figure
    plt.savefig('image/sae_test2_{}.png'.format(i), dpi=100)

In [None]:
# Analyse Meaning of Latent Variables

image_base = np.expand_dims(PegSimulator.getBoardEmpty()[1], axis=0)
image_base = PegSimulator.sampleRandom(1)
latent_softmax = sae_model.encode(image_base)
latent_base = roundLatent(latent_softmax)[0:1, :, 0] # Empty Board as Base Vector

#latent_base = np.ones((1, STATE_ENCODING[0]))

# Decode Base Vector
image_base = sae_model.decode(buildLatentBinary(latent_base))[0]

for i in range(STATE_ENCODING[0]):
    # Switch one Value
    latent_value = latent_base.copy()
    latent_value[0, i] = 1 - latent_value[0, i]

    # Decode Latent Vector
    image = sae_model.decode(buildLatentBinary(latent_value))[0]
    image_diff = image - image_base

    plt.figure(figsize=(80, 20))

    # display original
    ax = plt.subplot(1, 3, 1)
    plt.imshow(image_base[:, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display difference
    ax = plt.subplot(1, 3, 2)
    ## MEANING: Black deletes Peg, White sets Peg, Grey has no Effect
    plt.imshow(image_diff[:, :, 0], cmap=cm.gray, vmin=-1, vmax=1)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(1, 3, 3)
    plt.imshow(image[:, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

In [None]:
# Finish Model

#sae_model.save()
#sae_model.finish()

<h2>Action Auto Encoder</h2>

In [None]:
from aae import ActionAutoEncoder

In [None]:
# Load/Create Train Data

try:
    raise Exception('Enforce data generation')
    loaded = np.load('data/aae_data.npz')
    aae_in, aae_out = (loaded[i] for i in loaded.files)
except:
    aae_in = None
    while aae_in is None or aae_in.shape[0] < 2e5:
        aae_data = PegSimulator().sampleSequence(count=50, one_game=True)

        if PERFECT_ENCODING:
            if aae_in is None: # Only print once
                print('Using Perfect Encoding')
            aae_data_encoded = 1 - PegSimulator.perfectEncoding(aae_data)
        else:
            aae_data_encoded = sae_model.encode(aae_data)
            aae_data_encoded = roundLatent(aae_data_encoded)[:, :, 0] # No Batching
        aae_data_encoded_result = np.roll(aae_data_encoded, -1, axis=0)

        if aae_in is None:
            aae_in = aae_data_encoded[:-1]
            aae_out = aae_data_encoded_result[:-1]
        else:
            aae_in = np.concatenate((aae_in, aae_data_encoded[:-1]))
            aae_out = np.concatenate((aae_out, aae_data_encoded_result[:-1]))

    # Prune double entrys
    aae_data = np.concatenate((aae_in, aae_out), axis=1)
    aae_data = np.unique(aae_data, axis=0)
    aae_in = aae_data[:, :STATE_ENCODING[0]]
    aae_out = aae_data[:, STATE_ENCODING[0]:]

    # Save Data
    np.savez('data/aae_data.npz', aae_in, aae_out)

print(aae_in.shape[0])

In [None]:
# Initialize/Load Model

network = [100, 140]

aae_model = ActionAutoEncoder(data_shape=(STATE_ENCODING[0],), latent_shape=ACTION_ENCODING, network_shape=network)
aae_model.load()

In [None]:
# Train Model

aae_model.setWeight(5.0, 1.0, 0.0)
loss, loss_eval = aae_model.train(data_in=aae_in, data_out=aae_out)

plt.plot(loss, label='Loss')
plt.plot(loss_eval, label='Loss Evaluation')
plt.legend(loc='best')
plt.savefig('image/loss_aae.png', dpi=100)

In [None]:
# Analyse Action Encoding given before and after states

aae_test_data = PegSimulator().sampleSequence(count=50, one_game=True)

if PERFECT_ENCODING:
    aae_test_data_encoded = 1 - PegSimulator.perfectEncoding(aae_test_data)
else:
    aae_test_data_encoded = sae_model.encode(aae_test_data)
    aae_test_data_encoded = roundLatent(aae_test_data_encoded)[:, :, 0] # No Batching

aae_test_data_encoded_result = np.roll(aae_test_data_encoded, -1, axis=0)
aae_test_in = aae_test_data_encoded[:-1]
aae_test_out = aae_test_data_encoded_result[:-1]

aae_test_action = aae_model.encode(aae_test_in, aae_test_out)
aae_test_action_round = roundLatent(aae_test_action)

aae_test_out_round = aae_model.decode(aae_test_in, aae_test_action_round)
aae_test_out_round = roundLatentBinary(aae_test_out_round)
aae_test_out_round_decoded = sae_model.decode(buildLatentBinary(aae_test_out_round))

for i in range(aae_test_action.shape[0]):
    plt.figure(figsize=(80, 20))

    # display input
    ax = plt.subplot(1, 4, 1)
    plt.imshow(aae_test_data[i, :, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Input', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display latent
    ax = plt.subplot(1, 4, 2)
    plt.imshow(np.transpose(aae_test_action[i]), cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Latent', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display output
    ax = plt.subplot(1, 4, 3)
    plt.imshow(aae_test_data[i+1, :, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Output', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display output rounded
    ax = plt.subplot(1, 4, 4)
    plt.imshow(aae_test_out_round_decoded[i, :, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    plt.title('Output Latent Rounded', fontsize=80)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # save figure
    plt.savefig('image/aae_test_{}.png'.format(i), dpi=100)

In [None]:
# Control Action on multiple input states

start_state = PegSimulator().sampleSequence(count=1)

if PERFECT_ENCODING:
    start_state_encoded = 1 - PegSimulator.perfectEncoding(start_state)
else:
    start_state_encoded = sae_model.encode(start_state)
    start_state_encoded = roundLatent(start_state_encoded)[:, :, 0] # No Batching

all_actions = allActionsBinary(ACTION_ENCODING)[:10]
for action in all_actions:
    after_state_encoded = aae_model.decode(start_state_encoded, action[np.newaxis, :, :])
    after_state_encoded = roundLatentBinary(after_state_encoded)
    if PERFECT_ENCODING:
        after_state = PegSimulator.perfectDecoding(1 - after_state_encoded)
    else:
        after_state = sae_model.decode(buildLatentBinary(after_state_encoded))

    plt.figure(figsize=(60, 15))

    # display input
    ax = plt.subplot(1, 2, 1)
    plt.imshow(start_state[0, :, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display output
    ax = plt.subplot(1, 2, 2)
    plt.imshow(after_state[0, :, :, 0], cmap=cm.gray, vmin=0, vmax=1)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

In [None]:
# Finish Model

#aae_model.save()
#aae_model.finish()

<h2>State Discriminator</h2>

In [None]:
from sd import StateDiscriminator

In [None]:
# Load/Create PU Train Data

try:
    #raise Exception('Enforce data generation')
    loaded = np.load('data/sd_data.npz')
    sd_in, sd_out = (loaded[i] for i in loaded.files)
except:
    sd_in = PegSimulator().sampleSequence(count=int(5e4), unique=True)

    if PERFECT_ENCODING:
        print('Using Perfect Encoding')
        sd_in = 1 - PegSimulator.perfectEncoding(sd_in)
    else:
        sd_in = sae_model.encode(sd_in)
        sd_in = roundLatent(sd_in)[:, :, 0] # No Batching

    # Mixed Train Data
    if True:
        sd_in_mixed = np.random.randint(2, size=(int(5e4), STATE_ENCODING[0]))

        print('Denoising ...')
        while True: # Denoise Random Data
            sd_in_mixed_decoded = sae_model.decode(buildLatentBinary(sd_in_mixed))
            sd_in_mixed_encoded = sae_model.encode(sd_in_mixed_decoded)
            sd_in_mixed_encoded = roundLatent(sd_in_mixed_encoded)[:, :, 0]

            difference = np.sum(np.abs(sd_in_mixed - sd_in_mixed_encoded))
            print('Difference:', difference)
            if difference > 1:
                sd_in_mixed = sd_in_mixed_encoded
            else:
                #sd_in_mixed_decoded_show = np.reshape(np.sum(np.abs(sd_in_mixed - sd_in_mixed_encoded), axis=1) >= 1, (-1,))
                #for i in range(sd_in_mixed_decoded_show.shape[0]):
                #    if sd_in_mixed_decoded_show[i]:
                #        plt.imshow(sd_in_mixed_decoded[i, :, :, 0], cmap=cm.gray, vmin=0, vmax=1)
                break
        print('Denoising Done')
    else:
        sd_in_mixed = PegSimulator().sampleSequence(count=int(5e4), unique=True)
        sd_in_mixed = sae_model.encode(sd_in_mixed)
        sd_in_mixed = roundLatent(sd_in_mixed)[:, :, 0] # No Batching

        # Randomly switch single peg, should cause mostly illegal states
        random_delete = np.random.randint(STATE_ENCODING[0], size=(sd_in_mixed.shape[0],))
        sd_in_mixed[np.arange(sd_in_mixed.shape[0]), random_delete] = 1 - sd_in_mixed[np.arange(sd_in_mixed.shape[0]), random_delete]

    # Combine, Prune double entrys
    sd_out = np.zeros((sd_in.shape[0] + sd_in_mixed.shape[0], 1))
    sd_out[:sd_in.shape[0]] = 1
    sd_in = np.concatenate((sd_in, sd_in_mixed))
    sd_in, indices = np.unique(sd_in, axis=0, return_index=True)
    sd_out = sd_out[indices]

    # Shuffle
    sd_data = np.concatenate((sd_in, sd_out), axis=1)
    np.random.shuffle(sd_data)
    sd_in = sd_data[:, :STATE_ENCODING[0]]
    sd_out = sd_data[:, STATE_ENCODING[0]:]

    # Save Data
    np.savez('data/sd_data.npz', sd_in, sd_out)

print('True:', np.sum(sd_out==1), 'Mixed:', np.sum(sd_out==0))

In [None]:
# Initialize/Load Model

sd_model = StateDiscriminator(data_shape=(STATE_ENCODING[0],), network_shape=[80])
#sd_model.load()

sd_C = 0.9 # Last calculated C

In [None]:
# Train Model

loss, loss_eval = sd_model.train(data_in=sd_in, data_out=sd_out, epochs=50)

plt.plot(loss, label='Loss')
plt.plot(loss_eval, label='Loss Evaluation')
plt.legend(loc='best')
plt.savefig('image/loss_sd.png', dpi=100)

In [None]:
# Calculate C
import math

test_split = math.ceil(sd_in.shape[0] * 0.1) # 0.1 set in train function

positive_tests = sd_in[-test_split:][sd_out[-test_split:, 0] == 1]
print(test_split, positive_tests.shape)

predicted = sd_model.predict(positive_tests)
sd_C = np.average(predicted)
print('C:', sd_C)

In [None]:
# Test performance

test_data = PegSimulator().sampleSequence(count=50, one_game=True)

if PERFECT_ENCODING:
    print('Using Perfect Encoding')
    test_data = 1 - PegSimulator.perfectEncoding(test_data)
else:
    test_data = sae_model.encode(test_data)
    test_data = roundLatent(test_data)[:, :, 0] # No Batching

# Randomly switch single peg, should cause mostly illegal states
random_delete = np.random.randint(STATE_ENCODING[0], size=(test_data.shape[0],))
test_data[np.arange(test_data.shape[0]), random_delete] = 1 - test_data[np.arange(test_data.shape[0]), random_delete]

predicted = sd_model.predict(test_data) * sd_C
print(predicted)
print(np.average(predicted))

In [None]:
# Finish Model

#sd_model.save()
#sd_model.finish()

<h2>Action Discriminator</h2>

In [None]:
from ad import ActionDiscriminator

In [None]:
# Load/Create PU Train Data (Takes some time)

try:
    #raise Exception('Enforce data generation')
    loaded = np.load('data/ad_data.npz')
    ad_in, ad_in_result, ad_out = (loaded[i] for i in loaded.files)
except:
    ad_in = None
    while ad_in is None or ad_in.shape[0] < 1e5:
        ad_data = PegSimulator().sampleSequence(count=50, one_game=True)

        if PERFECT_ENCODING:
            if ad_in is None: # Only print once
                print('Using Perfect Encoding')
            ad_data_encoded = 1 - PegSimulator.perfectEncoding(ad_data)
        else:
            ad_data_encoded = sae_model.encode(ad_data)
            ad_data_encoded = roundLatent(ad_data_encoded)[:, :, 0] # No Batching
        ad_data_encoded_result = np.roll(ad_data_encoded, -1, axis=0)

        if ad_in is None:
            ad_in = ad_data_encoded[:-1]
            ad_in_result = ad_data_encoded_result[:-1]
        else:
            ad_in = np.concatenate((ad_in, ad_data_encoded[:-1]))
            ad_in_result = np.concatenate((ad_in_result, ad_data_encoded_result[:-1]))

    # Prune double entrys
    ad_in = np.concatenate((ad_in, ad_in_result), axis=1)
    ad_in = np.unique(ad_in, axis=0)
    ad_in_result = ad_in[:, STATE_ENCODING[0]:]
    ad_in = ad_in[:, :STATE_ENCODING[0]]
    ad_in_count = ad_in.shape[0]

    # Mixed Train Data
    all_actions = allActionsBinary(ACTION_ENCODING)
    for i in range(ad_in.shape[0]):
        states = np.repeat(ad_in[i:i+1], all_actions.shape[0], axis=0)
        results = aae_model.decode(states, all_actions)
        results = roundLatentBinary(results)
        results_eval = sd_model.predict(results)[:, 0]
        results_eval = results_eval > (0.5 * sd_C)

        # Only select possibly legal result states
        states = states[results_eval]
        results = results[results_eval]

        # Randomly select datapoints
        selection = np.arange(states.shape[0])
        np.random.shuffle(selection)
        states = states[selection[:np.min([1, states.shape[0]])]]
        results = results[selection[:np.min([1, states.shape[0]])]]

        ad_in = np.concatenate((ad_in, states))
        ad_in_result = np.concatenate((ad_in_result, results))

    # Combine, Prune double entrys
    ad_out = np.zeros((ad_in.shape[0], 1))
    ad_out[:ad_in_count] = 1
    ad_in = np.concatenate((ad_in, ad_in_result), axis=1)
    ad_in, indices = np.unique(ad_in, axis=0, return_index=True)
    ad_out = ad_out[indices]

    # Shuffle
    ad_data = np.concatenate((ad_in, ad_out), axis=1)
    np.random.shuffle(ad_data)
    ad_in = ad_data[:, :STATE_ENCODING[0]]
    ad_in_result = ad_data[:, STATE_ENCODING[0]:-1]
    ad_out = ad_data[:, -1:]

    # Save Data
    np.savez('data/ad_data.npz', ad_in, ad_in_result, ad_out)

print('True:', np.sum(ad_out==1), 'Mixed:', np.sum(ad_out==0))

In [None]:
# Initialize/Load Model

ad_model = ActionDiscriminator(data_shape=(STATE_ENCODING[0],), network_shape=[140])
ad_model.load()

ad_C = 0.9 # Last calculated C

In [None]:
# Train Model

loss, loss_eval = ad_model.train(data_s=ad_in, data_t=ad_in_result, data_out=ad_out, epochs=50)

plt.plot(loss, label='Loss')
plt.plot(loss_eval, label='Loss Evaluation')
plt.legend(loc='best')
plt.savefig('image/loss_ad.png', dpi=100)

In [None]:
# Calculate C
import math

test_split = math.ceil(ad_in.shape[0] * 0.1) # 0.1 set in train function

positive_tests_in = ad_in[-test_split:][ad_out[-test_split:, 0] == 1]
positive_tests_result = ad_in_result[-test_split:][ad_out[-test_split:, 0] == 1]

predicted = ad_model.predict(positive_tests_in, positive_tests_result)
ad_C = np.average(predicted)
print('C:', ad_C)

In [None]:
# Test performance

test_data = PegSimulator().sampleSequence(count=50, one_game=True)

if PERFECT_ENCODING:
    print('Using Perfect Encoding')
    test_data = 1 - PegSimulator.perfectEncoding(test_data)
else:
    test_data = sae_model.encode(test_data)
    test_data = roundLatent(test_data)[:, :, 0] # No Batching
test_data_result = np.roll(test_data, -1, axis=0)

test_data = test_data[:-1]
test_data_result = test_data_result[:-1]

# Randomly switch single peg, makes all transitions illegal
random_delete = np.random.randint(STATE_ENCODING[0], size=(test_data.shape[0],))
test_data_result[np.arange(test_data.shape[0]), random_delete] = 1 - test_data_result[np.arange(test_data.shape[0]), random_delete]

predicted = ad_model.predict(test_data, test_data_result) * ad_C
print(np.average(predicted))

In [None]:
# Finish Model

#ad_model.save()
#ad_model.finish()