# Import timeframe data

In [7]:
import numpy as np
from keras.utils.np_utils import to_categorical

def first_x_sec(seconds, filepath, timecol):
    ''' param: seconds, int, filter out the first seconds of the game
        param: filepath, string, the filepath of the csv datafile
        param: timecol, int, the index of the column that contains time
    '''
    f = open(filepath)
    lines = f.readlines()
    lines = [ line.split(',') for line in lines ]
    if seconds is None:
        return np.array(lines[1:])
    else:
        return np.array(list(filter(lambda line: int(line[timecol])<=seconds, lines[1:])))

# First 5 min: 5*60=300, col 2 is time first line is header
#ability_upgrade = first_x_sec(300, 'dataset/ability_upgrades.csv', 2)
#objectives = first_x_sec(300, 'dataset/objectives.csv', 7)
player_time_data = first_x_sec(300, 'dataset/player_time.csv', 1)

## Match file contains more than just the label
# column 10 contains the True/False value of Radiant win
matches_data = first_x_sec(None, 'dataset/match.csv', None)



# Import players.csv data

In [11]:
def read_columns(filepath, start_col, stop_col):
    f = open(filepath)
    lines = f.readlines()
    lines = [ line.strip().split(',')[start_col:stop_col] for line in lines[1:] ]
    return np.array(lines).astype(np.int)
    
hero_data = read_columns('dataset/players.csv', 2, 3)

[[ 86]
 [ 51]
 [ 83]
 [ 11]
 [ 67]
 [106]
 [102]
 [ 46]
 [  7]
 [ 73]
 [  7]
 [ 82]
 [ 71]
 [ 39]
 [ 21]
 [ 73]
 [ 22]
 [  5]
 [ 67]
 [106]]
[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [19]:
labels = np.array([int(match[9] == 'True') for match in matches_data])
print('Shape of player time data matrix')
print(player_time_data.shape)
player_time_data = player_time_data.astype(np.int)

## Separate data matches. Each index in data is one match. One match is a 32*x.
## x is the number of logs. We have data every 60 secods, so for 5 min x=5
games = [np.empty(shape=(32,0), dtype=np.int)]*50000
for x in player_time_data:
    games[x[0]] = np.concatenate((np.array(games[x[0]]), x.reshape(32,1)), axis=1)
#Print the first match log
print("First game data")
print(games[0])

Shape of player time data matrix
(299994, 32)
First game data
[[   0    0    0    0    0    0]
 [   0   60  120  180  240  300]
 [   0  409  546  683  956 1056]
 [   0    0    0    1    1    1]
 [   0   63  283  314  485  649]
 [   0  142  622  927 1264 1451]
 [   0    1    4    9   11   13]
 [   0  186  645 1202 1583 1810]
 [   0  168  330  430  530  630]
 [   0    0    0    0    0    0]
 [   0  125  376  376  391  504]
 [   0  200  345  644  919 1102]
 [   0    0    1    6   11   15]
 [   0  193  698 1172 1610 1888]
 [   0  194  628  806 1281 1708]
 [   0    1    5    7   10   17]
 [   0  125  374  570 1216 1633]
 [   0  174  354  614 1082 1300]
 [   0    2    4    8    8   11]
 [   0   77  437  829 1318 1901]
 [   0  138  673  895 1087 1233]
 [   0    1    5    8   10   11]
 [   0   62  543  842 1048 1352]
 [   0  345  684  958 1500 1841]
 [   0    6   12   16   26   32]
 [   0  351  805 1135 1842 2162]
 [   0  100  200  300  400  500]
 [   0    0    0    0    0    0]
 [   0   77  2

# Normalize Player_time Data

In [20]:
from sklearn.preprocessing import normalize
for i, game in enumerate(games):
    games[i] = np.vstack((game[0], normalize(game[1:, :])))



# Merge Data

In [21]:
print(games[0])
for i, game in enumerate(games):
    games[i] = np.vstack((games[i], np.repeat(hero_data[10*i:10*(i+1)], games[i].shape[1], axis=1)))
print("asd")
print(games[0])

[[ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.13483997  0.26967994  0.40451992  0.53935989  0.67419986]
 [ 0.          0.23768796  0.31730471  0.39692146  0.55557382  0.61368823]
 [ 0.          0.          0.          0.57735027  0.57735027  0.57735027]
 [ 0.          0.06877631  0.30894755  0.34278986  0.52946841  0.70850515]
 [ 0.          0.06369908  0.2790199   0.41583833  0.56701149  0.6508969 ]
 [ 0.          0.05076731  0.20306923  0.45690577  0.55844039  0.65997501]
 [ 0.          0.06712827  0.23278352  0.43380743  0.57131212  0.65323748]
 [ 0.          0.16801479  0.33002904  0.43003784  0.53004665  0.63005545]
 [ 0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.14884391  0.44772249  0.44772249  0.46558376  0.60013865]
 [ 0.          0.12326142  0.21262595  0.39690177  0.56638622  0.67917042]
 [ 0.          0.          0.05109761  0.30658568  0.56207374  0.7664642 ]
 [ 0.          0.06800379

In [22]:
labels_binary = to_categorical(labels)

# We don't know how long the game existed for, therefore we append labels_binary for each frame.
data = []
labels_binary_length_corrected = []
for index, game in enumerate(games):
    for frame in game.T:
        data.append(frame[1:])
        labels_binary_length_corrected.append(labels_binary[index])


data = np.array(data)
labels_binary_length_corrected = np.array(labels_binary_length_corrected)

num_train_examples = int(len(data) * 0.9)

x_train = data[:num_train_examples,:]
y_train = labels_binary_length_corrected[:num_train_examples,:]
x_test = data[num_train_examples:,:]
y_test = labels_binary_length_corrected[num_train_examples:,:]

x_train = np.reshape(x_train, x_train.shape + (1,))
x_test = np.reshape(x_test, x_test.shape + (1,))

print(x_train.shape)

(269994, 41, 1)


In [23]:
from keras.layers import Dense, Activation
from keras.models import Sequential
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
import keras

model = Sequential()

model.add(LSTM(
    input_shape=x_train.shape[1:],
    units=50,
    return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
    units=100,
    return_sequences=False))

model.add(Dense(
    units=2))
model.add(Activation('softmax'))
#model.compile(loss='mse', optimizer='rmsprop')
model.compile(loss='mse', optimizer='rmsprop', metrics=[keras.metrics.mae, keras.metrics.categorical_accuracy])
#model.compile(loss=keras.losses.categorical_crossentropy,
#             optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.0, nesterov=False),
#             metrics=[keras.metrics.mae, keras.metrics.categorical_accuracy])

In [24]:
res = model.fit(x_train, y_train, epochs=50, batch_size=1000, validation_split=0.2)

Train on 215995 samples, validate on 53999 samples
Epoch 1/50

KeyboardInterrupt: 

In [None]:
# Test loss and accuracy
loss_and_metrics = model.evaluate(x_test, y_test)
print(loss_and_metrics)