In [40]:
import numpy as np

class sklearn_dataset:
    def __init__(self, data_array, target_array):
        self.data = np.array(data_array)
        self.target = np.array(target_array)

def load_original_data(file_name='origin.txt'):
    matches = []
    with open(file_name, 'r') as f:
        for line in f.readlines():
            json_str = line.strip()
            matches.append(json.loads(json_str))
    return matches

def load_10m_original_data():
    matches = []
    for i in range(1, 21):
        print('loading ' + str(i))
        with open(str(i)+'_origin.txt', 'r') as f:
            for line in f.readlines():
                json_str = line.strip()
                matches.append(json.loads(json_str))
    return matches

import json
from sklearn import metrics
from sklearn.model_selection import train_test_split
#matches = load_original_data(file_name='origin.txt')
#matches = load_10m_original_data()

In [44]:
import numpy as np
import mxnet as mx
import logging

total_hero_count = 114

def to4d(img):
    print(img.shape)
    return img.reshape(img.shape[0], 1, 1, total_hero_count)

# Create a place holder variable for the input data
data = mx.symbol.Variable('data')
fc1 = mx.symbol.FullyConnected(data=data, num_hidden=total_hero_count)
act1 = mx.symbol.Activation(data=fc1, act_type="relu")
fc2 = mx.symbol.FullyConnected(data=act1, num_hidden=total_hero_count)
act2 = mx.symbol.Activation(data=fc2, act_type="relu")
fc3 = mx.symbol.FullyConnected(data=data, num_hidden=total_hero_count)
act3 = mx.symbol.Activation(data=fc2, act_type="relu")
fc4 = mx.symbol.FullyConnected(data=act3, num_hidden=total_hero_count)
act4 = mx.symbol.Activation(data=fc3, act_type="relu")
fcn = mx.symbol.FullyConnected(data=act4, num_hidden=2)
mlp = mx.symbol.SoftmaxOutput(data=fcn, name='softmax')
model = mx.mod.Module(symbol=mlp)

In [51]:
import json
from sklearn import metrics
from sklearn.model_selection import train_test_split

batch_size = 100

def process_data(matches, duration_min=0, duration_max=7200, mmr_min=0, mmr_max=10000):
    teams_win = []
    matches_heroes = []
    for match in matches:
        duration = match['duration']
        mmr = match['avg_mmr']
        if isinstance(duration, int) is False or isinstance(mmr, int) is False:
            continue
        if duration < duration_min or duration > duration_max:
            continue
        if mmr > mmr_max or mmr < mmr_min:
            continue
        #if match['lobby_type'] is not 7:
        #    continue
        match_heroes = [0 for i in range(total_hero_count)]
        radiant_team = match['radiant_team'].split(',')
        dire_team = match['dire_team'].split(',')
        if match['radiant_win']:
            team_win = 1
        else:
            team_win = 0
        if len(radiant_team) is not 5 or len(dire_team) is not 5:
            continue
        for hero_id in radiant_team:
            match_heroes[int(hero_id)-1] = 1
        for hero_id in dire_team:
            match_heroes[int(hero_id)-1] = -1
        teams_win.append(team_win)
        matches_heroes.append(match_heroes)
    return sklearn_dataset(matches_heroes, teams_win)

checkpoint = mx.callback.do_checkpoint('mx_mlp')
logging.getLogger().setLevel(logging.DEBUG)

matches_all = None

for i in range(8,21):
    print('training ' + str(i))
    matches = load_original_data(file_name=str(i)+'_origin.txt')
    #matches_train, matches_test = train_test_split(matches, test_size=0.2)
    #matches_data_train = process_data(matches_train)
    #matches_data_test = process_data(matches_test)
    matches = process_data(matches)
    if matches_all is None:
        matches_all = matches
    else:
        matches_all.data = np.concatenate((matches_all.data, matches.data), axis=0)
        matches_all.target = np.concatenate((matches_all.target, matches.target), axis=0)
    
train_iter = mx.io.NDArrayIter(to4d(matches_all.data), matches_all.target, batch_size, shuffle=True)
#val_iter = mx.io.NDArrayIter(to4d(matches_all.data), matches_all.target, batch_size)

model.fit(
    train_iter,       # training data
    #eval_data=val_iter,
    optimizer='sgd',
    optimizer_params={'learning_rate':0.1},
    eval_metric='acc',
    num_epoch=10,
    epoch_end_callback=checkpoint
)

training 8
training 9
training 10
training 11
training 12
training 13
training 14
training 15
training 16
training 17
training 18
training 19
training 20
(6499848, 114)


INFO:root:Epoch[0] Train-accuracy=0.591381
INFO:root:Epoch[0] Time cost=35.623
INFO:root:Saved checkpoint to "mx_mlp-0001.params"
INFO:root:Epoch[1] Train-accuracy=0.595353
INFO:root:Epoch[1] Time cost=35.482
INFO:root:Saved checkpoint to "mx_mlp-0002.params"
INFO:root:Epoch[2] Train-accuracy=0.598305
INFO:root:Epoch[2] Time cost=35.290
INFO:root:Saved checkpoint to "mx_mlp-0003.params"
INFO:root:Epoch[3] Train-accuracy=0.600206
INFO:root:Epoch[3] Time cost=35.428
INFO:root:Saved checkpoint to "mx_mlp-0004.params"
INFO:root:Epoch[4] Train-accuracy=0.601447
INFO:root:Epoch[4] Time cost=35.314
INFO:root:Saved checkpoint to "mx_mlp-0005.params"
INFO:root:Epoch[5] Train-accuracy=0.602368
INFO:root:Epoch[5] Time cost=36.338
INFO:root:Saved checkpoint to "mx_mlp-0006.params"
INFO:root:Epoch[6] Train-accuracy=0.602953
INFO:root:Epoch[6] Time cost=35.372
INFO:root:Saved checkpoint to "mx_mlp-0007.params"
INFO:root:Epoch[7] Train-accuracy=0.603544
INFO:root:Epoch[7] Time cost=35.282
INFO:root:S

In [52]:
matches = load_original_data(file_name='20_origin.txt')
matches = process_data(matches)

val_iter = mx.io.NDArrayIter(to4d(matches.data), matches.target, batch_size)


(499987, 114)


In [62]:
score = model.score(val_iter, ['acc'])
print(score)

[('accuracy', 0.606528)]
