In [1]:
import pandas as pd

In [2]:
from pathlib import Path

import sequence_sensei
import numpy as np

In [3]:
import tqdm

In [4]:
import axelrod as axl

In [6]:
import keras

In [7]:
axl.__version__

'4.2.0'

In [8]:
axl.Action(1)

C

**Outputs**

In [9]:
dfs = []
for file in Path("best_responses/").glob("*.csv"):
    dfs.append(pd.read_csv(file, index_col=0,))

In [10]:
df = pd.concat(dfs).reset_index(drop=True)

In [11]:
df = df.drop(columns=['index'])

In [12]:
len(df)

5760

**Inputs**

In [13]:
import axelrod as axl

In [14]:
top_twenty_strategies = ["EvolvedLookerUp2_2_2",
"Evolved HMM 5",
"PSO Gambler 2_2_2",
"Evolved FSM 16",
"Evolved FSM 16 Noise 05",
"Evolved ANN 5",
"Evolved ANN",
"PSO Gambler 1_1_1",
"Evolved FSM 4",
"PSO Gambler Mem1",
"Evolved ANN 5 Noise 05",
"DBS",
"Winner12",
"Omega TFT",
"DoubleCrosser",
"Fool Me Once",
"PSO Gambler 2_2_2 Noise 05",
"BackStabber",
"Gradual",
"Meta Winner"]

In [15]:
strategies = {strategy().name: strategy() for strategy in axl.strategies if strategy().name in top_twenty_strategies}

In [16]:
names = [s for s in strategies]

In [17]:
df = df[df['opponent'].isin(names)]

In [18]:
opponents_moves = []
for i, opponent in enumerate(tqdm.tqdm_notebook(df['opponent'])):
    
    majority = 'Majority'
    if majority in opponent:
        name = opponent.split(majority)[0] + 'Majority:' + opponent.split(majority)[-1]
        if name[-1] == ":":
            name = name[:-1]
        opponent = strategies[name]
    
    else:
        opponent = strategies[opponent.split(':')[0]]
    
    sequence = sequence_sensei.get_sequence_str(df.iloc[i].values[-205:])
    cycler = axl.Cycler(sequence)

    match = axl.Match([opponent, cycler], turns=205)
    _ = match.play()
    
    moves_opp, moves_cycler = zip(*match.result)
    
    assert sequence_sensei.get_sequence_str(moves_cycler) == sequence
    
    opponents_moves.append((opponent,) + moves_opp)

HBox(children=(IntProgress(value=0, max=714), HTML(value='')))




**Transform Data for RNN**

In [19]:
sequences = [[moves[0]] + [move.value for move in moves[1:]] for moves in opponents_moves]

In [20]:
data = pd.DataFrame(sequences)

In [21]:
data.columns = df.columns

In [22]:
data.to_csv('data/top_twenty_sequences.csv'), df.to_csv('data/top_twenty_targets.csv')

(None, None)

In [23]:
data.head()

Unnamed: 0,opponent,gene_0,gene_1,gene_2,gene_3,gene_4,gene_5,gene_6,gene_7,gene_8,...,gene_195,gene_196,gene_197,gene_198,gene_199,gene_200,gene_201,gene_202,gene_203,gene_204
0,PSO Gambler Mem1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,1,1,0,0
1,PSO Gambler Mem1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,PSO Gambler Mem1,1,1,1,1,1,1,1,1,1,...,0,0,0,0,0,0,0,0,1,1
3,Evolved ANN 5,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
4,Evolved ANN 5,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


**Transform the data**

In [24]:
sequences = pd.read_csv('data/top_twenty_sequences.csv', index_col=0)
targets = pd.read_csv('data/top_twenty_targets.csv', index_col=0)

In [25]:
inputs = sequences.replace({0: 2})

In [26]:
inputs = inputs.drop(columns=["opponent", "gene_204"]).values
outputs = targets.drop(columns=["opponent", "gene_0"]).values

In [27]:
max_length = len(inputs[0])

In [28]:
prep_X_train = []
prep_y_train = []

for i, sequence in enumerate(inputs):
    assert len(sequence) == max_length
    for histories in range(1, max_length + 1):
        prep_X_train.append(sequence[:histories])
        prep_y_train.append(outputs[i][histories - 1])        

In [29]:
padded_X_train = keras.preprocessing.sequence.pad_sequences(prep_X_train, maxlen=max_length, padding='post') 

In [30]:
padded_X = pd.DataFrame(padded_X_train, columns=[i for i in range(max_length)])

In [31]:
padded_X.to_csv('data/top_twenty_padded_inputs_classification.csv')

In [32]:
y = pd.DataFrame(prep_y_train, columns=['target'])

In [33]:
y.to_csv('data/top_twenty_classification_output.csv')

In [34]:
len(y), len(padded_X)

(145656, 145656)