In [1]:
import axelrod as axl

import pandas as pd
from pathlib import Path
import sequence_sensei

In [2]:
import tqdm

In [4]:
import keras
from keras.layers import LSTM, Dense, Dropout, Bidirectional
from keras.models import Sequential

In [5]:
import matplotlib.pyplot as plt

In [6]:
import imp

player_class = imp.load_source('player_class', 'player_class.py')

In [7]:
import numpy as np

In [8]:
from keras.layers.embeddings import Embedding

**Padded Classification**

In [9]:
inputs = pd.read_csv("data/basic_padded_inputs_classification.csv", index_col=0)
outputs = pd.read_csv("data/basic_classification_output.csv", index_col=0)

In [10]:
X = inputs.values
y = outputs['target'].values

In [11]:
data = list(zip(X, y))

In [12]:
X.shape

(17136, 204)

In [13]:
y.shape

(17136,)

In [14]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=0.2)

In [15]:
X_train, y_train = list(zip(*train))
X_test, y_test = list(zip(*test))

y_train = np.array(y_train)
y_test = np.array(y_test)
X_train = np.array(X_train)
X_test = np.array(X_test)

In [24]:
max_length = len(X[0])
batch_size = 128
num_of_epochs = 1

num_cells = 204
drop_out_rate = 0.2

In [17]:
top_words = 3
embedding_vecor_length = 1

In [20]:
model = Sequential()

model.add(Embedding(top_words, embedding_vecor_length, input_length=max_length))
model.add(Bidirectional(LSTM(num_cells)),
        )

model.add(Dropout(rate=drop_out_rate))
model.add(Dense(1, activation="sigmoid"))

model.compile(
    loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"]
)

In [25]:
history = model.fit(
    X_train,
    y_train,
    validation_data=(X_test, y_test),
    batch_size=batch_size,
    epochs=num_of_epochs,
    verbose=1,
)

Train on 13708 samples, validate on 3428 samples
Epoch 1/1


In [26]:
model.layers[0].get_weights()

[array([[-0.00981959],
        [-0.04510924],
        [-0.08965405]], dtype=float32)]

In [27]:
X.shape

(17136, 204)

**check for unknown**

**Unknown lenght**

In [28]:
from sklearn.model_selection import train_test_split

In [37]:
def batch_generator(inputs, outputs):
    while True:
        for size in range(1, 205):
            batches = [
                (sequence, target)
                for sequence, target in zip(inputs, outputs)
                if len(sequence) == size
            ]

            x, y = zip(*batches)
            batch = np.array(x)
            output_batch = np.array(y)

            try:
                batch = batch.reshape((batch.shape[0], batch.shape[1], 1))
                output_batch = output_batch.reshape(
                    (output_batch.shape[0], output_batch.shape[1], 1)
                )
            except IndexError:
                batch = batch.reshape((batch.shape[0], 1, 1))
                output_batch = output_batch.reshape(
                    (output_batch.shape[0], 1, 1)
                )

            yield batch, output_batch


def format_sequences_to_input(sequences):
    inputs = sequences.drop(columns=["opponent", "gene_204"]).values
    max_length = len(inputs[0])

    prep_X_train = []
    for histories in range(1, max_length + 1):
        for sequence in inputs:
            assert len(sequence) == max_length
            prep_X_train.append(sequence[:histories])

    return np.array(prep_X_train)

def format_sequences_to_output(sequences):
    inputs = sequences.drop(columns=["opponent", "gene_0"]).values
    max_length = len(inputs[0])

    prep_y_train = []
    for histories in range(1, max_length + 1):
        for sequence in inputs:
            assert len(sequence) == max_length
            prep_y_train.append(sequence[:histories])

    return np.array(prep_y_train)

In [38]:
outputs = pd.read_csv("data/basic_targets.csv", index_col=0)
y = format_sequences_to_output(outputs)

sequences = pd.read_csv("data/basic_sequences.csv", index_col=0)
inputs = format_sequences_to_input(sequences)

In [39]:
input_train, input_test, output_train, output_test = train_test_split(
    inputs, y, test_size=0.2, random_state=0
)

In [40]:
trainGen = batch_generator(input_train, output_train)
testGen = batch_generator(input_test, output_test)

In [41]:
number_of_epochs = 1

In [42]:
from keras.layers import LSTM, Dense, TimeDistributed

In [43]:
model = Sequential()

model.add(LSTM(
            100, return_sequences=True, input_shape=(None, 1)
        )
    )

model.add(Dropout(rate=drop_out_rate))

model.add(TimeDistributed(Dense(1, activation="sigmoid")))

model.compile(
    loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]
)

history = model.fit_generator(
    trainGen,
    steps_per_epoch=204,
    epochs=number_of_epochs,
    verbose=1,
    validation_data=testGen,
    validation_steps=204,
)

Epoch 1/1


In [45]:
trainGen = batch_generator(input_train, output_train)

In [49]:
test_x, test_y = trainGen.__next__()

In [50]:
test_x[0], test_y[0]

(array([[1],
        [0]]), array([[0],
        [0]]))

**Classification with no fixed lenght**

In [90]:
def batch_generator(inputs, outputs):
    while True:
        for size in range(1, 3):
            batches = [
                (sequence, target)
                for sequence, target in zip(inputs, outputs)
                if len(sequence) == size
            ]

            x, y = zip(*batches)
            batch = np.array(x)
            output_batch = np.array(y)

            try:
                batch = batch.reshape((batch.shape[0], batch.shape[1], 1))
                output_batch = output_batch.reshape(
                    (output_batch.shape[0], 1)
                )
            except IndexError:
                batch = batch.reshape((batch.shape[0], 1, 1))
                output_batch = output_batch.reshape(
                    (output_batch.shape[0], 1)
                )

            yield batch, output_batch

In [78]:
batches = [
                (sequence, target)
                for sequence, target in zip(input_train, output_train)
                if len(sequence) == 2
            ]

In [80]:
x, y = zip(*batches)

In [87]:
output_batch = np.array(y)

In [89]:
output_batch.shape

(66,)

In [86]:
batch.reshape((batch.shape[0], batch.shape[1], 1))

array([[[1],
        [0]],

       [[1],
        [0]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [0]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [0]],

       [[1],
        [1]],

       [[0],
        [0]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [0]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [0]],

       [[0],
        [1]],

       [[1],
        [1]],

       [[1],
        [1]],

       [[1],
        [0]],

       [[1],
        [0]],

       [[1],
        [1]],

       [[1],
       

In [91]:
def format_sequences_to_output(sequences):
    inputs = sequences.drop(columns=["opponent", "gene_0"]).values
    max_length = len(inputs[0])

    prep_y_train = []
    for histories in range(1, max_length + 1):
        for sequence in inputs:
            assert len(sequence) == max_length
            prep_y_train.append(sequence[histories - 1])

    return np.array(prep_y_train)

In [92]:
outputs = pd.read_csv("data/basic_targets.csv", index_col=0)
y = format_sequences_to_output(outputs)

sequences = pd.read_csv("data/basic_sequences.csv", index_col=0)
inputs = format_sequences_to_input(sequences)

In [93]:
input_train, input_test, output_train, output_test = train_test_split(
    inputs, y, test_size=0.2, random_state=0
)

In [94]:
trainGen = batch_generator(input_train, output_train)
testGen = batch_generator(input_test, output_test)

In [97]:
model = Sequential()

model.add(LSTM(
            100, return_sequences=True, input_shape=(None, 1)
        )
    )

model.add(LSTM(
            100,
        )
    )
model.add(Dropout(rate=drop_out_rate))

model.add((Dense(1, activation="sigmoid")))

model.compile(
    loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]
)

In [98]:
history = model.fit_generator(
    trainGen,
    steps_per_epoch=204,
    epochs=number_of_epochs,
    verbose=1,
#     validation_data=testGen,
#     validation_steps=204,
)

Epoch 1/1
