# Import timeframe data

In [None]:
import numpy as np

def first_x_sec(seconds, filepath, timecol):
    ''' param: seconds, int, filter out the first seconds of the game
        param: filepath, string, the filepath of the csv datafile
        param: timecol, int, the index of the column that contains time
    '''
    f = open(filepath)
    lines = f.readlines()
    lines = [ line.split(',') for line in lines ]
    if seconds is None:
        return np.array(lines[1:])
    else:
        return np.array(list(filter(lambda line: int(line[timecol])<=seconds, lines[1:])))

# First 5 min: 5*60=300, col 2 is time first line is header
#ability_upgrade = first_x_sec(300, 'dataset/ability_upgrades.csv', 2)
#objectives = first_x_sec(300, 'dataset/objectives.csv', 7)
player_time_data = first_x_sec(300, 'dataset/player_time.csv', 1)

## Match file contains more than just the label
# column 10 contains the True/False value of Radiant win
matches_data = first_x_sec(None, 'dataset/match.csv', None)



# Import players.csv data

In [None]:
def read_columns(filepath, start_col, stop_col):
    f = open(filepath)
    lines = f.readlines()
    lines = [ line.strip().split(',')[start_col:stop_col] for line in lines[1:] ]
    return np.array(lines).astype(np.int)
    
asd = read_columns('dataset/players.csv', 2, 3)
print(asd[:20])

In [None]:
labels = np.array([int(match[9] == 'True') for match in matches_data])
print('Shape of player time data matrix')
print(player_time_data.shape)
player_time_data = player_time_data.astype(np.int)

## Separate data matches. Each index in data is one match. One match is a 32*x.
## x is the number of logs. We have data every 60 secods, so for 5 min x=5
games = [np.empty(shape=(32,0), dtype=np.int)]*50000
for x in player_time_data:
    games[x[0]] = np.concatenate((np.array(games[x[0]]), x.reshape(32,1)), axis=1)
#Print the first match log
print("First game data")
print(games[0])

# Merge Data

In [None]:
for i, game in enumerate(games):
    games[i] = np.vstack((games[i], np.repeat(asd[10*i:10*(i+1)], games[i].shape[1], axis=1)))

print(games[0])

# Normalize Data

In [None]:
from sklearn.preprocessing import normalize
for index, game in enumerate(games):
    games[index] = np.vstack((game[0], normalize(game[1:, :])))

In [None]:
from keras.utils.np_utils import to_categorical
labels_binary = to_categorical(labels)

# We don't know how long the game existed for, therefore we append labels_binary for each frame.
data = []
labels_binary_length_corrected = []
for index, game in enumerate(games):
    for frame in game.T:
        data.append(frame[1:])
        labels_binary_length_corrected.append(labels_binary[index])


data = np.array(data)
labels_binary_length_corrected = np.array(labels_binary_length_corrected)

num_train_examples = int(len(data) * 0.9)

x_train = data[:num_train_examples,:]
y_train = labels_binary_length_corrected[:num_train_examples,:]
x_test = data[num_train_examples:,:]
y_test = labels_binary_length_corrected[num_train_examples:,:]

x_train = np.reshape(x_train, x_train.shape + (1,))
x_test = np.reshape(x_test, x_test.shape + (1,))

print(x_train.shape)

In [None]:
from keras.layers import Dense, Activation
from keras.models import Sequential
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
import keras

model = Sequential()

model.add(LSTM(
    input_shape=x_train.shape[1:],
    units=50,
    return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
    units=100,
    return_sequences=False))

model.add(Dense(
    units=2))
model.add(Activation('softmax'))
#model.compile(loss='mse', optimizer='rmsprop')
model.compile(loss='mse', optimizer='rmsprop', metrics=[keras.metrics.mae, keras.metrics.categorical_accuracy])
#model.compile(loss=keras.losses.categorical_crossentropy,
#             optimizer=keras.optimizers.SGD(lr=0.0001, momentum=0.0, nesterov=False),
#             metrics=[keras.metrics.mae, keras.metrics.categorical_accuracy])

In [None]:
res = model.fit(x_train, y_train, epochs=50, batch_size=1000, validation_split=0.2)

In [None]:
# Test loss and accuracy
loss_and_metrics = model.evaluate(x_test, y_test)
print(loss_and_metrics)