In [1]:
import seaborn as sns
import pandas as pd
import pylab as plt
import numpy as np
from steam import WebAPI
from datetime import datetime
import os
import glob
import json
import tensorflow as tf
import sys
from IPython import display
import requests
import tensorflow.keras.backend as K
from sklearn import decomposition

#import logging
#logging.getLogger("tensorflow").setLevel(logging.ERROR)

#from tensorflow.python.framework.ops import disable_eager_execution
#disable_eager_execution()
tf.autograph.set_verbosity(0)
print(tf.__version__)
#dont keep api key in github!!!
api_file = os.path.join('..','..','..','apikeys','steam_api_key.txt')
with open(api_file, 'r') as fin:
    api = WebAPI(key=fin.readline())

2.1.0


In [2]:
sns.set_style('ticks')

In [5]:
#declare interface urls
#dota2_beta should be used for testing in order to not aggressively make API requests
dota2_id = '570'
dota2_beta_id = '205790'

hero_list = api.call('IEconDOTA2_'+dota2_id+'.GetHeroes')
hero_vocab=[int(hero_id['id']) for hero_id in hero_list['result']['heroes']]
#print(hero_vocab)
print(len(hero_vocab))

119


# Build TFRecord input pipeline

## Parse function

In [25]:
def _parse_function_extra_info(example_proto):
    """Extracts features and labels.
  
    Args:
        example_proto: tf.Example protocol (unsure what this is)    
      Returns:
    A `tuple` `(labels, features)`:
      features: A dict of tensors representing the features
      labels: A tensor with the corresponding labels.
    """
    features = {}
    #there are 5 heroes per team 
    heroes_per_team=5

    #loop through five times to capture each hero on each team
    for i in range(heroes_per_team):
        #64 bit int for hero
        features["radiant_hero"+str(i)]=tf.io.FixedLenFeature(1, dtype=tf.int64)
        features["dire_hero"+str(i)] =tf.io.FixedLenFeature(1, dtype=tf.int64) 
 
    #num_heroes for current patch as obtained above
    num_heroes=119
    
    features['target'] = tf.io.FixedLenFeature(1, dtype=tf.int64)
    
    #parse features
    parsed_features = tf.io.parse_single_example(example_proto, features)
    
    #load features into feature dict
    feature_dict={}
    #stack hero input so that each hero is treated the same in model
    feature_dict['radiant_heroes']=tf.stack([
                    tf.one_hot(parsed_features['radiant_hero0'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero1'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero2'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero3'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero4'],num_heroes,axis=0)])
    feature_dict['dire_heroes']=tf.stack([
                    tf.one_hot(parsed_features['dire_hero0'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero1'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero2'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero3'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero4'],num_heroes,axis=0)])

    target = parsed_features['target']
    
    return feature_dict, target

#### Check the parse function worked

In [27]:
# Create the Dataset object.
ds = tf.data.TFRecordDataset(os.path.join('Dota_data',
                                    'mixed_skill',
                                    'dota2_training_data',
                                    'radiant_win_indivhero_dota2_training_data.tfrecords'),
                             compression_type="GZIP")
# Map features and labels with the parse function.
ds = ds.map(_parse_function_extra_info)

In [28]:
ds = ds.shuffle(10000)
print(next(iter(ds)))

({'radiant_heroes': <tf.Tensor: shape=(5, 119, 1), dtype=float32, numpy=
array([[[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
   

## Input functions

In [46]:
# Create an input_fn that parses the tf.Examples from the given files,
# and split them into features and targets.
def _input_fn_extra_info(input_filenames, num_epochs=None, 
              shuffle=True, batch_size=50,compression_type=""):
   
   # Same code as above; create a dataset and map features and labels.
    ds = tf.data.TFRecordDataset(input_filenames,compression_type=compression_type)
    ds = ds.map(_parse_function_extra_info)

    if shuffle:
        ds = ds.shuffle(10000)
    ds = ds.batch(batch_size)
    ds = ds.repeat()
    
    # Return the dataset.
    return ds

## define feature column constructor functions

In [37]:
def construct_numeric_columns(keys,shapes):
    """Construct categorical features for all features
        Args: 
            input_features: names of input feature columns to use
        returns:
            a set of categorical columns
    """
    return [tf.feature_column.numeric_column(key,shape=shape)
                                    for key,shape in zip(keys,shapes)]                                          

# Train the model

## define model training function
also includes saving and logging

In [56]:
def train_model(
        optimizer,
        model_type,
        steps,
        batch_size,
        feature_columns,
        training_file,
        validation_file,
        compression_type='',
        temp_log_flag=True,
        hidden_units=[],
        dnn_feature_columns=None):
    """Trains a classification model.
  
    In addition to training, this function also prints training progress information,
    as well as a plot of the training and validation loss over time.
      
    Args:
        optimizer: A `tf.Optimizer`, the optimizer to use for gradient descent
        steps: A non-zero `int`, the total number of training steps. A training step
          consists of a forward and backward pass using a single batch.
        batch_size: A `int` specifying the number of examples for each training step.
        feature_columns: A `set` specifying the input feature columns to use.
        training_file: A `string`  pointing to the training file.
        validation_file: A `string` pointing to the validation file.
        compression_type: A `string` specifying compression type
        temp_log_flag: A `bool` specifying whether to save logs in temp folder
        
    Returns:
        A `Classifier` object trained on the training data.
    """

    #allow logging in temp directory or directory to be included in git
    #  useful if tuning hyperparams or testing
    if temp_log_flag:
        logdir = os.path.join('..','..','..','tmp_log_dir_dota',
                          datetime.now().strftime("dota_%Y%m%d-%H%M%S"))
    else:
        logdir = os.path.join('log_dir',
                          datetime.now().strftime("%Y%m%d-%H%M%S"))
    
    #set config options
    config=tf.estimator.RunConfig(model_dir=logdir,
                                  save_summary_steps=20)
    # Create a classifier object.
    if model_type=='linear':
        classifier = tf.estimator.LinearClassifier(
              feature_columns=feature_columns,
              optimizer=optimizer,
              config=config)
    elif model_type=='dnn':
        classifier = tf.estimator.DNNClassifier(
              hidden_units=hidden_units,
              feature_columns=feature_columns,
              optimizer=optimizer,
              config=config)
    elif model_type=='dnn_linear':
        if dnn_feature_columns==None:
            dnn_feature_columns=feature_columns

        classifier = tf.estimator.DNNLinearCombinedClassifier(
              dnn_hidden_units=hidden_units,
              linear_feature_columns=feature_columns,
              dnn_feature_columns=dnn_feature_columns,
              linear_optimizer=optimizer,
              dnn_optimizer=optimizer,
              config=config)       
    else:
        raise KeyError('model_type not found. Got: '+ model_type)
        
    #define training and validation inputs
    training_input_fn =lambda: _input_fn_extra_info([training_file], 
                                         batch_size=batch_size,
                                         compression_type=compression_type)
    validation_input_fn =lambda: _input_fn_extra_info([validation_file], 
                                           batch_size=batch_size,
                                          compression_type=compression_type)

    # Train the model, but do so inside a loop so that we can periodically assess
    # loss metrics.
    print("Training model...")
    
    training_loss = []
    validation_loss = []
    # Train the model, starting from the prior state.
    classifier.train(input_fn=training_input_fn,
                                steps=steps)
        
    ## Compute predictions.
    evaluation_metrics = classifier.evaluate(
          input_fn=validation_input_fn,
          steps=steps)

    #save params for this training to a json in the logdir

    training_params = dict({'model_type':'single_target_'+model_type,
                            'steps':steps,
                            'batch_size':batch_size})
                            #'embedding_dims':embedding_dims,
                            #'optimizer':optimizer._name,
                            #'hyper_parameters':optimizer.get_config()})
    if len(hidden_units)>0:
        training_params['hidden_units']=hidden_units

    with open(os.path.join(logdir,'training_params.json'),'w') as fp:
        json.dump(training_params,fp)

    #save trained model
    #serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
    #                      tf.feature_column.make_parse_example_spec(feature_columns))
    #export_path = classifier.export_saved_model(log_dir, serving_input_fn)
    
    
    print("validation set metrics:")
    for m in evaluation_metrics:
        print(m, evaluation_metrics[m])
    print("---")
    
    return classifier


### Define training/validation files

In [53]:
training_file_novocab = os.path.join('Dota_data',
                             'mixed_skill',
                             'dota2_training_data',
                             'radiant_win_indivhero_dota2_training_data.tfrecords')
validation_file_novocab = os.path.join('Dota_data',
                             'mixed_skill',
                             'dota2_validation_data',
                             'radiant_win_indivhero_dota2_validation_data.tfrecords')

### define feature columns for all info, no_stats, no info/tags, or hero only

In [40]:
#shapes of each feature column  (five axis is for heroes per team)
hero_shape=(5,len(hero_vocab));

In [43]:
feat_list_heroonly = ['radiant_heroes','dire_heroes']

feat_shape_list_heroonly = [hero_shape,hero_shape]

feature_columns_heroonly=construct_numeric_columns(feat_list_heroonly,
                                                   feat_shape_list_heroonly)

### Train models

#### Linear test cases (for hero only, no_stats and full extra_info)

In [57]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
classifier = train_model(
            optimizer,
            model_type='linear',
            batch_size=1000, 
            steps=5000,
            feature_columns=feature_columns_heroonly,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP')

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir_dota/dota_20200327-211604', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSa

#### DNN model testing

In [58]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
classifier = train_model(
            optimizer,
            model_type='dnn',
            batch_size=500, 
            steps=5000,
            feature_columns=feature_columns_heroonly,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[8,8,4])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir_dota/dota_20200327-214021', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSa

In [59]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,amsgrad=True)
classifier = train_model(
            optimizer,
            model_type='dnn',
            batch_size=5000, 
            steps=10000,
            feature_columns=feature_columns_heroonly,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[16,8,8,4,2])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir_dota/dota_20200327-215440', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSa

INFO:tensorflow:global_step/sec: 1.46222
INFO:tensorflow:loss = 0.5245123, step = 6300 (68.389 sec)
INFO:tensorflow:global_step/sec: 1.47942
INFO:tensorflow:loss = 0.53203094, step = 6400 (67.594 sec)
INFO:tensorflow:global_step/sec: 1.46156
INFO:tensorflow:loss = 0.51924026, step = 6500 (68.420 sec)
INFO:tensorflow:global_step/sec: 1.49078
INFO:tensorflow:loss = 0.51823914, step = 6600 (67.079 sec)
INFO:tensorflow:Saving checkpoints for 6620 into ../../../tmp_log_dir_dota/dota_20200327-215440/model.ckpt.
INFO:tensorflow:global_step/sec: 1.44298
INFO:tensorflow:loss = 0.5201907, step = 6700 (69.302 sec)
INFO:tensorflow:global_step/sec: 1.47876
INFO:tensorflow:loss = 0.52669144, step = 6800 (67.625 sec)
INFO:tensorflow:global_step/sec: 1.47887
INFO:tensorflow:loss = 0.5274484, step = 6900 (67.618 sec)
INFO:tensorflow:global_step/sec: 1.48462
INFO:tensorflow:loss = 0.51525617, step = 7000 (67.358 sec)
INFO:tensorflow:global_step/sec: 1.49164
INFO:tensorflow:loss = 0.52772826, step = 7100

#### DNN + Linear (wide and deep) no_stats
this has the best performance of any model tested

In [60]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,amsgrad=True)
classifier = train_model(
            optimizer,
            model_type='dnn_linear',
            batch_size=5000, 
            steps=15000,
            feature_columns=feature_columns_heroonly,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[16,8,4,4,2])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir_dota/dota_20200328-015416', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSa

INFO:tensorflow:global_step/sec: 2.72982
INFO:tensorflow:global_step/sec: 2.79633
INFO:tensorflow:loss = 0.5442089, step = 9400 (71.793 sec)
INFO:tensorflow:global_step/sec: 2.80711
INFO:tensorflow:global_step/sec: 2.81218
INFO:tensorflow:loss = 0.53056914, step = 9600 (71.217 sec)
INFO:tensorflow:global_step/sec: 2.77687
INFO:tensorflow:global_step/sec: 2.7739
INFO:tensorflow:loss = 0.5392291, step = 9800 (71.993 sec)
INFO:tensorflow:global_step/sec: 2.74977
INFO:tensorflow:global_step/sec: 2.77853
INFO:tensorflow:loss = 0.50402963, step = 10000 (71.208 sec)
INFO:tensorflow:Saving checkpoints for 10084 into ../../../tmp_log_dir_dota/dota_20200328-015416/model.ckpt.
INFO:tensorflow:global_step/sec: 2.81352
INFO:tensorflow:global_step/sec: 2.80869
INFO:tensorflow:loss = 0.5376838, step = 10200 (72.284 sec)
INFO:tensorflow:global_step/sec: 2.81279
INFO:tensorflow:loss = 0.51814324, step = 10400 (71.400 sec)
INFO:tensorflow:global_step/sec: 2.78684
INFO:tensorflow:global_step/sec: 2.80219