In [1]:
import seaborn as sns
import pandas as pd
import pylab as plt
import numpy as np
from steam import WebAPI
from datetime import datetime
import os
import glob
import json
import tensorflow as tf
import sys
from IPython import display
import requests
import tensorflow.keras.backend as K
from sklearn import decomposition

#import logging
#logging.getLogger("tensorflow").setLevel(logging.ERROR)

#from tensorflow.python.framework.ops import disable_eager_execution
#disable_eager_execution()
tf.autograph.set_verbosity(0)
print(tf.__version__)

2.1.0


In [2]:
sns.set_style('ticks')

In [3]:
champfile=os.path.join('lol_data','version9.10.1_champion.json')
with open(champfile,'r') as fin:
    champdata=json.load(fin)
champ_vocab=[]
for name,data in champdata['data'].items():
    champ_vocab.append(data['key'])
hero_vocab = [int(i) for i in champ_vocab]
vocab_len=len(hero_vocab)
print(len(hero_vocab))

144


# Build TFRecord input pipeline

## Parse function

In [4]:
def _parse_function_extra_info(example_proto):
    """Extracts features and labels.
  
    Args:
        example_proto: tf.Example protocol (unsure what this is)    
      Returns:
    A `tuple` `(labels, features)`:
      features: A dict of tensors representing the features
      labels: A tensor with the corresponding labels.
    """
    features = {}
    #there are 5 heroes per team 
    heroes_per_team=5
    #there are 4 pieces of info for a hero in LOL
    num_info_fields=4
    num_tags=6 #6 tags
    num_valid_stats=14 #14 valid stats (after removing mana and crit)
    #loop through five times to capture each hero on each team
    for i in range(heroes_per_team):
        #64 bit int for hero
        features["radiant_hero"+str(i)]=tf.io.FixedLenFeature(1, dtype=tf.int64)

        features["radiant_hero_info"+str(i)]=tf.io.FixedLenFeature(num_info_fields,
                                                                   dtype=tf.float32) 
        features["radiant_hero_tags"+str(i)] =tf.io.FixedLenFeature(num_tags,
                                                                    dtype=tf.int64) 
        features["radiant_hero_stats"+str(i)] =tf.io.FixedLenFeature(num_valid_stats,
                                                                     dtype=tf.float32) 
        
        features["dire_hero"+str(i)] =tf.io.FixedLenFeature(1, dtype=tf.int64) 
        features["dire_hero_info"+str(i)] =tf.io.FixedLenFeature(num_info_fields,
                                                                 dtype=tf.float32) 
        features["dire_hero_tags"+str(i)] =tf.io.FixedLenFeature(num_tags,
                                                                 dtype=tf.int64) 
        features["dire_hero_stats"+str(i)] =tf.io.FixedLenFeature(num_valid_stats,
                                                                  dtype=tf.float32) 
 
    #there are 11 different regions (at least in my most current dataset)
    num_regions=11
    #two skill levels in the lol api
    num_skill_levels=2
    #get num_heroes from hero_vocab
    num_heroes=len(hero_vocab)

    features["region"] = tf.io.FixedLenFeature(num_regions, dtype=tf.int64)
    features["skill_level"] = tf.io.FixedLenFeature(1, dtype=tf.int64)
    
    features["targets"] = tf.io.FixedLenFeature(1, dtype=tf.int64)
    
    #parse features
    parsed_features = tf.io.parse_single_example(example_proto, features)
    
    #load features into feature dict
    feature_dict={}
    #stack hero input so that each hero is treated the same in model
    feature_dict['radiant_heroes']=tf.stack([
                    tf.one_hot(parsed_features['radiant_hero0'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero1'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero2'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero3'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['radiant_hero4'],num_heroes,axis=0)])
    feature_dict['radiant_hero_info']=tf.stack([
                    parsed_features['radiant_hero_info0'],
                    parsed_features['radiant_hero_info1'],
                    parsed_features['radiant_hero_info2'],
                    parsed_features['radiant_hero_info3'],
                    parsed_features['radiant_hero_info4']])
    feature_dict['radiant_hero_tags']=tf.stack([
                    parsed_features['radiant_hero_tags0'],
                    parsed_features['radiant_hero_tags1'],
                    parsed_features['radiant_hero_tags2'],
                    parsed_features['radiant_hero_tags3'],
                    parsed_features['radiant_hero_tags4']])
    feature_dict['radiant_hero_stats']=tf.stack([
                    parsed_features['radiant_hero_stats0'],
                    parsed_features['radiant_hero_stats1'],
                    parsed_features['radiant_hero_stats2'],
                    parsed_features['radiant_hero_stats3'],
                    parsed_features['radiant_hero_stats4']])
    feature_dict['dire_heroes']=tf.stack([
                    tf.one_hot(parsed_features['dire_hero0'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero1'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero2'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero3'],num_heroes,axis=0),
                    tf.one_hot(parsed_features['dire_hero4'],num_heroes,axis=0)])
    feature_dict['dire_hero_info']=tf.stack([
                    parsed_features['dire_hero_info0'],
                    parsed_features['dire_hero_info1'],
                    parsed_features['dire_hero_info2'],
                    parsed_features['dire_hero_info3'],
                    parsed_features['dire_hero_info4']])
    feature_dict['dire_hero_tags']=tf.stack([
                    parsed_features['dire_hero_tags0'],
                    parsed_features['dire_hero_tags1'],
                    parsed_features['dire_hero_tags2'],
                    parsed_features['dire_hero_tags3'],
                    parsed_features['dire_hero_tags4']])
    feature_dict['dire_hero_stats']=tf.stack([
                    parsed_features['dire_hero_stats0'],
                    parsed_features['dire_hero_stats1'],
                    parsed_features['dire_hero_stats2'],
                    parsed_features['dire_hero_stats3'],
                    parsed_features['dire_hero_stats4']])
    feature_dict['skill_level']= tf.one_hot(parsed_features['skill_level'],
                                            num_skill_levels)
    feature_dict['region']=parsed_features['region']
    target = parsed_features['targets']
    
    return feature_dict, target

#### Check the parse function worked

In [5]:
# Create the Dataset object.
ds = tf.data.TFRecordDataset(os.path.join('LOL_data',
                                          'mix_tier',
                                          'lol_training_data',
                                          'blue_win_extra_info_dota2_compat_lol_training_data.tfrecords'),
                                          compression_type="GZIP")
# Map features and labels with the parse function.
ds = ds.map(_parse_function_extra_info)

In [6]:
ds = ds.shuffle(10000)
print(next(iter(ds)))

({'radiant_heroes': <tf.Tensor: shape=(5, 144, 1), dtype=float32, numpy=
array([[[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
   

## Input functions

In [7]:
# Create an input_fn that parses the tf.Examples from the given files,
# and split them into features and targets.
def _input_fn_extra_info(input_filenames, num_epochs=None, 
              shuffle=True, batch_size=50,compression_type=""):
   
   # Same code as above; create a dataset and map features and labels.
    ds = tf.data.TFRecordDataset(input_filenames,compression_type=compression_type)
    ds = ds.map(_parse_function_extra_info)

    if shuffle:
        ds = ds.shuffle(10000)
    ds = ds.batch(batch_size)
    ds = ds.repeat()
    
    # Return the dataset.
    return ds

## define feature column constructor functions

In [8]:
def construct_numeric_columns(keys,shapes):
    """Construct categorical features for all features
        Args: 
            input_features: names of input feature columns to use
        returns:
            a set of categorical columns
    """
    return [tf.feature_column.numeric_column(key,shape=shape)
                                    for key,shape in zip(keys,shapes)]                                          

# Train the model

## define model training function
also includes saving and logging

In [9]:
def train_model(
        optimizer,
        model_type,
        steps,
        batch_size,
        feature_columns,
        training_file,
        validation_file,
        compression_type='',
        temp_log_flag=True,
        hidden_units=[],
        dnn_feature_columns=None):
    """Trains a classification model.
  
    In addition to training, this function also prints training progress information,
    as well as a plot of the training and validation loss over time.
      
    Args:
        optimizer: A `tf.Optimizer`, the optimizer to use for gradient descent
        steps: A non-zero `int`, the total number of training steps. A training step
          consists of a forward and backward pass using a single batch.
        batch_size: A `int` specifying the number of examples for each training step.
        feature_columns: A `set` specifying the input feature columns to use.
        training_file: A `string`  pointing to the training file.
        validation_file: A `string` pointing to the validation file.
        compression_type: A `string` specifying compression type
        temp_log_flag: A `bool` specifying whether to save logs in temp folder
        
    Returns:
        A `Classifier` object trained on the training data.
    """

    #allow logging in temp directory or directory to be included in git
    #  useful if tuning hyperparams or testing
    if temp_log_flag:
        logdir = os.path.join('..','..','..','tmp_log_dir',
                          datetime.now().strftime("lol_%Y%m%d-%H%M%S"))
    else:
        logdir = os.path.join('log_dir',
                          datetime.now().strftime("%Y%m%d-%H%M%S"))
    
    #set config options
    config=tf.estimator.RunConfig(model_dir=logdir,
                                  save_summary_steps=20)
    # Create a classifier object.
    if model_type=='linear':
        classifier = tf.estimator.LinearClassifier(
              feature_columns=feature_columns,
              optimizer=optimizer,
              config=config)
    elif model_type=='dnn':
        classifier = tf.estimator.DNNClassifier(
              hidden_units=hidden_units,
              feature_columns=feature_columns,
              optimizer=optimizer,
              config=config)
    elif model_type=='dnn_linear':
        if dnn_feature_columns==None:
            dnn_feature_columns=feature_columns

        classifier = tf.estimator.DNNLinearCombinedClassifier(
              dnn_hidden_units=hidden_units,
              linear_feature_columns=feature_columns,
              dnn_feature_columns=dnn_feature_columns,
              linear_optimizer=optimizer,
              dnn_optimizer=optimizer,
              config=config)       
    else:
        raise KeyError('model_type not found. Got: '+ model_type)
        
    #define training and validation inputs
    training_input_fn =lambda: _input_fn_extra_info([training_file], 
                                         batch_size=batch_size,
                                         compression_type=compression_type)
    validation_input_fn =lambda: _input_fn_extra_info([validation_file], 
                                           batch_size=batch_size,
                                          compression_type=compression_type)

    # Train the model, but do so inside a loop so that we can periodically assess
    # loss metrics.
    print("Training model...")
    
    training_loss = []
    validation_loss = []
    # Train the model, starting from the prior state.
    classifier.train(input_fn=training_input_fn,
                                steps=steps)
        
    ## Compute predictions.
    evaluation_metrics = classifier.evaluate(
          input_fn=training_input_fn,
          steps=steps)

    #save params for this training to a json in the logdir

    training_params = dict({'model_type':'single_target_'+model_type,
                            'steps':steps,
                            'batch_size':batch_size})
                            #'embedding_dims':embedding_dims,
                            #'optimizer':optimizer._name,
                            #'hyper_parameters':optimizer.get_config()})
    if len(hidden_units)>0:
        training_params['hidden_units']=hidden_units

    with open(os.path.join(logdir,'training_params.json'),'w') as fp:
        json.dump(training_params,fp)

    #save trained model
    #serving_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(
    #                      tf.feature_column.make_parse_example_spec(feature_columns))
    #export_path = classifier.export_saved_model(log_dir, serving_input_fn)
    
    
    print("validation set metrics:")
    for m in evaluation_metrics:
        print(m, evaluation_metrics[m])
    print("---")
    
    return classifier


### Define training/validation files

In [10]:
training_file_novocab = os.path.join('LOL_data',
                             'mix_tier',
                             'lol_training_data',
                             'blue_win_extra_info_dota2_compat_lol_training_data.tfrecords')
validation_file_novocab = os.path.join('LOL_data',
                             'mix_tier',
                             'lol_validation_data',
                             'blue_win_extra_info_dota2_compat_lol_validation_data.tfrecords')

### define feature columns for all info, no_stats, no info/tags, or hero only

In [11]:
#shapes of each feature column  (five axis is for heroes per team)
hero_shape=(5,vocab_len);shape_info=(5,4);
shape_tags=(5,6);shape_stats=(5,14);
shape_skills=2;shape_region=11;

In [14]:
#construct categorical columns for each hero
feat_list_all = ['radiant_heroes','radiant_hero_info','radiant_hero_stats',
                 'radiant_hero_tags','dire_heroes','dire_hero_info',
                 'dire_hero_stats','dire_hero_tags']

feat_list_all.extend(['skill_level','region'])
feat_shape_list_all = [hero_shape,shape_info,shape_stats,shape_tags,
                   hero_shape,shape_info,shape_stats,shape_tags]
feat_shape_list_all.extend([shape_skills,shape_region])

feature_columns_all=construct_numeric_columns(feat_list_all,
                                              feat_shape_list_all)

In [15]:
feat_list_no_stats = ['radiant_heroes','radiant_hero_info','radiant_hero_tags',
                      'dire_heroes','dire_hero_info','dire_hero_tags']
feat_list_no_stats.extend(['skill_level','region'])
feat_shape_list_no_stats = [hero_shape,shape_info,shape_tags,
                           hero_shape,shape_info,shape_tags]
feat_shape_list_no_stats.extend([shape_skills,shape_region])
feature_columns_no_stats=construct_numeric_columns(feat_list_no_stats,
                                                   feat_shape_list_no_stats)

In [16]:
feat_list_no_info_tags = ['radiant_heroes','radiant_hero_stats',
                      'dire_heroes','dire_hero_stats']
feat_list_no_info_tags.extend(['skill_level','region'])
feat_shape_list_no_info_tags = [hero_shape,shape_stats,
                           hero_shape,shape_stats]
feat_shape_list_no_info_tags.extend([shape_skills,shape_region])
feature_columns_no_info_tags=construct_numeric_columns(feat_list_no_info_tags,
                                                   feat_shape_list_no_info_tags)

In [17]:
feat_list_hero_only = ['radiant_heroes','dire_heroes']
feat_list_hero_only.extend(['skill_level','region'])

feat_shape_list_hero_only = [hero_shape,hero_shape]
feat_shape_list_hero_only.extend([shape_skills,shape_region])

feature_columns_hero_only=construct_numeric_columns(feat_list_hero_only,
                                                   feat_shape_list_hero_only)

### Train models

#### Linear test cases (for hero only, no_stats and full extra_info)

In [139]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
classifier = train_model(
            optimizer,
            model_type='linear',
            batch_size=1000, 
            steps=2000,
            feature_columns=feature_columns_all,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP')

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200317-203639', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

In [138]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
classifier = train_model(
            optimizer,
            model_type='linear',
            batch_size=1000, 
            steps=2000,
            feature_columns=feature_columns_no_stats,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP')

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200317-202039', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

In [143]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
classifier = train_model(
            optimizer,
            model_type='linear',
            batch_size=1000, 
            steps=2000,
            feature_columns=feature_columns_no_info_tags,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP')

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200317-221539', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

In [144]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
classifier = train_model(
            optimizer,
            model_type='linear',
            batch_size=1000, 
            steps=2000,
            feature_columns=feature_columns_hero_only,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP')

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200317-223113', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

#### DNN model testing

In [145]:
#p good accuracy of 55.8 (but not nearly as good as linear dnn)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
classifier = train_model(
            optimizer,
            model_type='dnn',
            batch_size=500, 
            steps=2000,
            feature_columns=feature_columns_all,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[8,8,4])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200317-224635', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

In [109]:
#p good accuracy of 55.8 (but not nearly as good as linear dnn)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
classifier = train_model(
            optimizer,
            model_type='dnn',
            batch_size=500, 
            steps=2000,
            feature_columns=feature_columns_no_stats,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[8,8,4])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200315-150423', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

In [151]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
classifier = train_model(
            optimizer,
            model_type='dnn',
            batch_size=500, 
            steps=5000,
            feature_columns=feature_columns_no_info_tags,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[8,8,4])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200318-122328', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

In [19]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
classifier = train_model(
            optimizer,
            model_type='dnn',
            batch_size=500, 
            steps=10000,
            feature_columns=feature_columns_hero_only,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[8,8,4])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200319-183927', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

INFO:tensorflow:loss = 0.6687241, step = 6900 (10.974 sec)
INFO:tensorflow:global_step/sec: 9.20905
INFO:tensorflow:loss = 0.6589246, step = 7000 (10.859 sec)
INFO:tensorflow:global_step/sec: 9.17665
INFO:tensorflow:loss = 0.6761679, step = 7100 (10.897 sec)
INFO:tensorflow:global_step/sec: 9.20453
INFO:tensorflow:loss = 0.67023975, step = 7200 (10.864 sec)
INFO:tensorflow:global_step/sec: 9.10138
INFO:tensorflow:loss = 0.690635, step = 7300 (10.987 sec)
INFO:tensorflow:global_step/sec: 9.23294
INFO:tensorflow:loss = 0.6619476, step = 7400 (10.831 sec)
INFO:tensorflow:global_step/sec: 9.20834
INFO:tensorflow:loss = 0.65514064, step = 7500 (10.860 sec)
INFO:tensorflow:global_step/sec: 9.1936
INFO:tensorflow:loss = 0.65394634, step = 7600 (10.877 sec)
INFO:tensorflow:global_step/sec: 9.06219
INFO:tensorflow:loss = 0.6626881, step = 7700 (11.035 sec)
INFO:tensorflow:global_step/sec: 9.17601
INFO:tensorflow:loss = 0.6809902, step = 7800 (10.898 sec)
INFO:tensorflow:global_step/sec: 10.9043

In [18]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
classifier = train_model(
            optimizer,
            model_type='dnn',
            batch_size=500, 
            steps=15000,
            feature_columns=feature_columns_hero_only,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[8,8,4])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200319-155147', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Va

INFO:tensorflow:global_step/sec: 7.50642
INFO:tensorflow:loss = 0.67255676, step = 6200 (13.322 sec)
INFO:tensorflow:global_step/sec: 7.94433
INFO:tensorflow:loss = 0.67606133, step = 6300 (12.588 sec)
INFO:tensorflow:global_step/sec: 7.88066
INFO:tensorflow:loss = 0.67152256, step = 6400 (12.689 sec)
INFO:tensorflow:global_step/sec: 8.36407
INFO:tensorflow:loss = 0.6722382, step = 6500 (11.956 sec)
INFO:tensorflow:global_step/sec: 8.12651
INFO:tensorflow:loss = 0.64441675, step = 6600 (12.305 sec)
INFO:tensorflow:global_step/sec: 8.55719
INFO:tensorflow:loss = 0.6848858, step = 6700 (11.686 sec)
INFO:tensorflow:global_step/sec: 8.36158
INFO:tensorflow:loss = 0.65720856, step = 6800 (11.960 sec)
INFO:tensorflow:global_step/sec: 7.65256
INFO:tensorflow:loss = 0.64851016, step = 6900 (13.067 sec)
INFO:tensorflow:global_step/sec: 7.69634
INFO:tensorflow:loss = 0.66937166, step = 7000 (12.993 sec)
INFO:tensorflow:global_step/sec: 7.34388
INFO:tensorflow:loss = 0.6736476, step = 7100 (13.61

INFO:tensorflow:global_step/sec: 8.02704
INFO:tensorflow:loss = 0.6752995, step = 14300 (12.458 sec)
INFO:tensorflow:global_step/sec: 7.69328
INFO:tensorflow:loss = 0.64210236, step = 14400 (12.999 sec)
INFO:tensorflow:global_step/sec: 8.3197
INFO:tensorflow:loss = 0.6399539, step = 14500 (12.020 sec)
INFO:tensorflow:global_step/sec: 8.23883
INFO:tensorflow:loss = 0.650269, step = 14600 (12.138 sec)
INFO:tensorflow:global_step/sec: 8.02397
INFO:tensorflow:loss = 0.674077, step = 14700 (12.463 sec)
INFO:tensorflow:global_step/sec: 7.98719
INFO:tensorflow:loss = 0.6656219, step = 14800 (12.520 sec)
INFO:tensorflow:global_step/sec: 7.93487
INFO:tensorflow:loss = 0.64961386, step = 14900 (12.603 sec)
INFO:tensorflow:Saving checkpoints for 15000 into ../../../tmp_log_dir/lol_20200319-155147/model.ckpt.
INFO:tensorflow:Loss for final step: 0.64294213.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-03-19T16:21:44Z
INFO:tens

#### DNN + Linear (wide and deep) no_stats
this has the best performance of any model tested

In [126]:
#NO STATS MODEL WITH 65% accuracy!!!
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,amsgrad=True)
classifier = train_model(
            optimizer,
            model_type='dnn_linear',
            batch_size=5000, 
            steps=10000,
            feature_columns=feature_columns_no_stats,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[16,8,4,4,2])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200315-223444', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

INFO:tensorflow:global_step/sec: 1.9
INFO:tensorflow:loss = 0.60612327, step = 9000 (104.781 sec)
INFO:tensorflow:global_step/sec: 1.94034
INFO:tensorflow:global_step/sec: 1.94512
INFO:tensorflow:loss = 0.62423235, step = 9200 (102.944 sec)
INFO:tensorflow:global_step/sec: 1.93931
INFO:tensorflow:global_step/sec: 1.94427
INFO:tensorflow:loss = 0.60941267, step = 9400 (102.810 sec)
INFO:tensorflow:global_step/sec: 1.92747
INFO:tensorflow:global_step/sec: 1.9157
INFO:tensorflow:loss = 0.6274685, step = 9600 (104.373 sec)
INFO:tensorflow:global_step/sec: 1.91451
INFO:tensorflow:global_step/sec: 1.91809
INFO:tensorflow:loss = 0.61424446, step = 9800 (104.312 sec)
INFO:tensorflow:global_step/sec: 1.94831
INFO:tensorflow:global_step/sec: 1.93772
INFO:tensorflow:loss = 0.598904, step = 10000 (101.803 sec)
INFO:tensorflow:Saving checkpoints for 10002 into ../../../tmp_log_dir/lol_20200315-223444/model.ckpt.
INFO:tensorflow:Loss for final step: 0.598904.
INFO:tensorflow:Calling model_fn.
INFO:t

In [128]:
#NO STATS MODEL WITH 62.6% ACCURACY!!!
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,amsgrad=True)
classifier = train_model(
            optimizer,
            model_type='dnn_linear',
            batch_size=5000, 
            steps=15000,
            feature_columns=feature_columns_no_stats,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[16,8,4,4,2])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200316-053854', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

INFO:tensorflow:global_step/sec: 1.78988
INFO:tensorflow:global_step/sec: 1.79296
INFO:tensorflow:loss = 0.62016493, step = 9000 (112.062 sec)
INFO:tensorflow:global_step/sec: 1.78187
INFO:tensorflow:global_step/sec: 1.7519
INFO:tensorflow:loss = 0.6333469, step = 9200 (113.009 sec)
INFO:tensorflow:global_step/sec: 1.79627
INFO:tensorflow:global_step/sec: 1.75889
INFO:tensorflow:loss = 0.63056785, step = 9400 (112.288 sec)
INFO:tensorflow:global_step/sec: 1.79626
INFO:tensorflow:global_step/sec: 1.79307
INFO:tensorflow:loss = 0.6313123, step = 9600 (111.713 sec)
INFO:tensorflow:Saving checkpoints for 9612 into ../../../tmp_log_dir/lol_20200316-053854/model.ckpt.
INFO:tensorflow:global_step/sec: 1.78055
INFO:tensorflow:global_step/sec: 1.77007
INFO:tensorflow:loss = 0.64383703, step = 9800 (112.688 sec)
INFO:tensorflow:global_step/sec: 1.78366
INFO:tensorflow:global_step/sec: 1.77999
INFO:tensorflow:loss = 0.62179923, step = 10000 (110.990 sec)
INFO:tensorflow:global_step/sec: 1.79408
I

#### Testing wide and deep (dnn/linear) with other features

In [21]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001,amsgrad=True)
classifier = train_model(
            optimizer,
            model_type='dnn_linear',
            batch_size=5000, 
            steps=15000,
            feature_columns=feature_columns_hero_only,
            training_file=training_file_novocab, 
            validation_file=validation_file_novocab,
            compression_type='GZIP',
            hidden_units=[16,8,4,4,2])

INFO:tensorflow:Using config: {'_model_dir': '../../../tmp_log_dir/lol_20200321-221727', '_tf_random_seed': None, '_save_summary_steps': 20, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Training model...
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHoo

KeyboardInterrupt: 