# DATASET DEFINITION

In [1]:
import tensorflow as tf
import pandas as pd
import os
import json
import shutil
from tensorflow.python.lib.io.file_io import FileIO as open_file

  from ._conv import register_converters as _register_converters


In [2]:
DATASET_CSV="gs://ml-research-injenia/estimators/datasets/superhero-set/preproc_trainset.csv"
EVALSET_CSV="gs://ml-research-injenia/estimators/datasets/superhero-set/preproc_evalset.csv"
OUT_DIR="gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07"
MODEL_DIR=OUT_DIR

BATCH_SIZE = 100
TRAIN_STEPS= 100000

HIDDEN_UNITS=[1024, 512, 256]
EMBEDDING_COLUMNS_SIZE=4

In [3]:
with open_file(DATASET_CSV, "r") as f:
    df = pd.read_csv(f)
for c in df.columns:
    if df[c].dtypes.name == 'bool':
        df[c]=df[c].map({True: 'TRUE', False: 'FALSE'})
df

Unnamed: 0,index,Gender,Eye_color,Race,Hair_color,Height,Publisher,Skin_color,Alignment,Weight,...,Web_Creation,Reality_Warping,Odin_Force,Symbiote_Costume,Speed_Force,Phoenix_Force,Molecular_Dissipation,Vision_Cryo,Omnipresent,Omniscient
0,Molten Man,Male,gold,-,Gold,196.0,Marvel Comics,-,bad,248.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
1,Killer Frost,Female,blue,Human,Blond,-99.0,DC Comics,blue,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
2,Raven,Female,indigo,Human,Black,165.0,DC Comics,-,neutral,50.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
3,Vegeta,Male,-,Saiyan,Black,168.0,Shueisha,-,bad,73.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
4,Gog,Male,-,-,-,-99.0,DC Comics,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
5,Redeemer II,Male,-,-,-,-99.0,Image Comics,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
6,Captain Marvel II,Male,blue,Human,Black,175.0,DC Comics,-,good,74.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
7,Proto-Goblin,Male,green,-,Blond,-99.0,Marvel Comics,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
8,Sentry,Male,blue,Mutant,Blond,188.0,Marvel Comics,-,neutral,87.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
9,Kylo Ren,Male,-,Human,-,-99.0,George Lucas,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE


In [4]:
with open_file(EVALSET_CSV, "r") as f:
    df_eval = pd.read_csv(f)
for c in df_eval.columns:
    if df_eval[c].dtypes.name == 'bool':
        df_eval[c]=df_eval[c].map({True: 'TRUE', False: 'FALSE'})
df_eval

Unnamed: 0,index,Gender,Eye_color,Race,Hair_color,Height,Publisher,Skin_color,Alignment,Weight,...,Web_Creation,Reality_Warping,Odin_Force,Symbiote_Costume,Speed_Force,Phoenix_Force,Molecular_Dissipation,Vision_Cryo,Omnipresent,Omniscient
0,Phantom,Male,-,-,-,-99.0,DC Comics,-,good,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
1,Absorbing Man,Male,blue,Human,No Hair,193.0,Marvel Comics,-,bad,122.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
2,Deadshot,Male,brown,Human,Brown,185.0,DC Comics,-,bad,91.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
3,Hal Jordan,Male,brown,Human,Brown,188.0,DC Comics,-,good,90.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
4,Black Flash,Male,-,God / Eternal,-,-99.0,DC Comics,-,neutral,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
5,Zatanna,Female,blue,Human,Black,170.0,DC Comics,-,good,57.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
6,Captain America,Male,blue,Human,blond,188.0,Marvel Comics,-,good,108.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
7,Captain Epic,Male,blue,-,Brown,188.0,Team Epic TV,-,good,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
8,Vulture,Male,brown,Human,No Hair,180.0,Marvel Comics,-,bad,79.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
9,Hyperion,Male,blue,Eternal,Red,183.0,Marvel Comics,-,good,207.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE


In [5]:
with open_file(os.path.join(MODEL_DIR,"trainset.csv"), "w") as f:
    df.to_csv(f,index=False)
with open_file(os.path.join(MODEL_DIR,"evalset.csv"), "w") as f:
    df_eval.to_csv(f,index=False)

In [6]:
exclude_columns=["Publisher","Height","Weight","Skin_color","Race"]
INDEX=df.columns[0]
COLUMNS=[x for x in df.columns[1:] if x not in exclude_columns]
LABEL_FIELD="Alignment"

# TRAINING

### Gather all the required informations from the dataset

In [7]:
FIELD_DEFAULTS=[]
FIELD_TYPES={}
FIELD_CATEGORIES={}
dtypes=dict(df.dtypes)
for c in COLUMNS:
    if(str(dtypes[c])=="bool"):
        FIELD_DEFAULTS.append([0])
        FIELD_TYPES[c]="bool"
    elif(str(dtypes[c])=="object"):
        FIELD_DEFAULTS.append(["NA"])
        FIELD_TYPES[c]="string"
        FIELD_CATEGORIES[c]=list(sorted(set(list(df[c].unique())+["NA"])))
    else:  
        FIELD_DEFAULTS.append([0.0])
        FIELD_TYPES[c]="number"
FIELD_CATEGORIES[LABEL_FIELD]=[x for x in FIELD_CATEGORIES[LABEL_FIELD] if x != "NA"]

### Save dataset informations

In [8]:
#if(not os.path.isdir(MODEL_DIR)):
#    os.makedirs(MODEL_DIR)

with open_file(os.path.join(MODEL_DIR,"data","dataset_fields.json"), "w") as f:
    json.dump(
        {
            "fields":{
                "columns" : [x for x in COLUMNS if x != LABEL_FIELD],
                "types" : {x:FIELD_TYPES[x] for x in FIELD_TYPES if x != LABEL_FIELD},
                "categories" : {x:FIELD_CATEGORIES[x] for x in FIELD_CATEGORIES if x != LABEL_FIELD}
            },
            "label":{
                "column" : LABEL_FIELD,
                "type" : FIELD_TYPES[LABEL_FIELD],
                "categories" : FIELD_CATEGORIES[LABEL_FIELD],
            }
        },
        f
    )

### Create a dataset object

In [9]:
with open_file(os.path.join(MODEL_DIR,"data","actual_trainset.csv"), "w") as f:
    df[[INDEX]+COLUMNS].to_csv(f, index=False)
with open_file(os.path.join(MODEL_DIR,"data","actual_evalset.csv"), "w") as f:
    df_eval[[INDEX]+COLUMNS].to_csv(f, index=False)

In [10]:
#tf.reset_default_graph()
with open_file(os.path.join(MODEL_DIR,"data","tf_trainset.csv"), "w") as f:
    df[COLUMNS].to_csv(f, index=False)

def create_trainset():
    ds = tf.data.TextLineDataset(os.path.join(MODEL_DIR,"data","tf_trainset.csv")).skip(1)
    def _parse_line(line):
        # Decode the line into its fields
        fields = tf.decode_csv(line, FIELD_DEFAULTS)

        # Pack the result into a dictionary
        features = dict(zip(COLUMNS,fields))

        # Separate the label from the features
        label = features.pop(LABEL_FIELD)

        return features, label

    parsed_ds = ds.map(_parse_line)
    
    return parsed_ds.shuffle(TRAIN_STEPS).repeat().batch(BATCH_SIZE)




with open_file(os.path.join(MODEL_DIR,"data","tf_evalset.csv"), "w") as f:
    df_eval[COLUMNS].to_csv(f, index=False)

def create_evalset():
    ds = tf.data.TextLineDataset(os.path.join(MODEL_DIR,"data","tf_evalset.csv")).skip(1)
    def _parse_line(line):
        # Decode the line into its fields
        fields = tf.decode_csv(line, FIELD_DEFAULTS)

        # Pack the result into a dictionary
        features = dict(zip(COLUMNS,fields))

        # Separate the label from the features
        label = features.pop(LABEL_FIELD)

        return features, label

    parsed_ds = ds.map(_parse_line)
    
    return parsed_ds.shuffle(TRAIN_STEPS).repeat().batch(BATCH_SIZE)

### build and train the estimator

In [11]:
print(list(FIELD_CATEGORIES[LABEL_FIELD]))
feature_columns=[]
for c in COLUMNS:
    if c == LABEL_FIELD:
        continue
        
    if FIELD_TYPES[c]=="string":
        feature_columns.append(tf.feature_column.embedding_column(
            tf.feature_column.categorical_column_with_vocabulary_list(
                key=c,
                vocabulary_list=list(FIELD_CATEGORIES[c])
            ),
            EMBEDDING_COLUMNS_SIZE
        ))
    if FIELD_TYPES[c]=="number":
        feature_columns.append(tf.feature_column.numeric_column(key=c))
        
est = tf.estimator.DNNClassifier(
    HIDDEN_UNITS,
    feature_columns,
    n_classes=len(FIELD_CATEGORIES[LABEL_FIELD]),
    label_vocabulary=list(FIELD_CATEGORIES[LABEL_FIELD]),
    model_dir=MODEL_DIR
)
# Train the estimator
#est.train(
#    steps=TRAIN_STEPS,
#    input_fn=create_dataset)

# Train and evaluate the estimator
tf.estimator.train_and_evaluate(
    est,
    tf.estimator.TrainSpec(input_fn=create_trainset, max_steps=TRAIN_STEPS),
    tf.estimator.EvalSpec(input_fn=create_evalset)
)

['bad', 'good', 'neutral']
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff04ba80fd0>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow

INFO:tensorflow:global_step/sec: 17.9915
INFO:tensorflow:loss = 0.7095619, step = 42225 (5.559 sec)
INFO:tensorflow:global_step/sec: 18.0514
INFO:tensorflow:loss = 0.0151975285, step = 42325 (5.539 sec)
INFO:tensorflow:global_step/sec: 17.8705
INFO:tensorflow:loss = 0.87894845, step = 42425 (5.596 sec)
INFO:tensorflow:global_step/sec: 17.7994
INFO:tensorflow:loss = 1.3967955, step = 42525 (5.619 sec)
INFO:tensorflow:global_step/sec: 18.0556
INFO:tensorflow:loss = 0.0087587815, step = 42625 (5.538 sec)
INFO:tensorflow:global_step/sec: 17.908
INFO:tensorflow:loss = 0.011664378, step = 42725 (5.584 sec)
INFO:tensorflow:global_step/sec: 17.699
INFO:tensorflow:loss = 0.0079541365, step = 42825 (5.650 sec)
INFO:tensorflow:global_step/sec: 17.8306
INFO:tensorflow:loss = 0.008432875, step = 42925 (5.608 sec)
INFO:tensorflow:global_step/sec: 17.9535
INFO:tensorflow:loss = 0.012036237, step = 43025 (5.570 sec)
INFO:tensorflow:global_step/sec: 17.7044
INFO:tensorflow:loss = 0.011482999, step = 43

INFO:tensorflow:global_step/sec: 8.66174
INFO:tensorflow:loss = 0.6978102, step = 48818 (10.330 sec)
INFO:tensorflow:global_step/sec: 8.9016
INFO:tensorflow:loss = 0.00872105, step = 48918 (11.234 sec)
INFO:tensorflow:global_step/sec: 15.6662
INFO:tensorflow:loss = 0.7229447, step = 49018 (6.382 sec)
INFO:tensorflow:global_step/sec: 17.4698
INFO:tensorflow:loss = 0.010766565, step = 49118 (5.725 sec)
INFO:tensorflow:global_step/sec: 17.1345
INFO:tensorflow:loss = 0.7041786, step = 49218 (5.836 sec)
INFO:tensorflow:global_step/sec: 17.6122
INFO:tensorflow:loss = 0.6689156, step = 49318 (5.677 sec)
INFO:tensorflow:global_step/sec: 17.8264
INFO:tensorflow:loss = 0.009813773, step = 49418 (5.610 sec)
INFO:tensorflow:global_step/sec: 17.6617
INFO:tensorflow:loss = 0.6884022, step = 49518 (5.662 sec)
INFO:tensorflow:global_step/sec: 17.68
INFO:tensorflow:loss = 0.7035912, step = 49618 (5.656 sec)
INFO:tensorflow:global_step/sec: 17.4321
INFO:tensorflow:loss = 0.009657765, step = 49718 (5.736

INFO:tensorflow:loss = 0.7983488, step = 55288 (5.620 sec)
INFO:tensorflow:global_step/sec: 17.8286
INFO:tensorflow:loss = 0.0070031174, step = 55388 (5.609 sec)
INFO:tensorflow:global_step/sec: 18.1262
INFO:tensorflow:loss = 0.7360921, step = 55488 (5.517 sec)
INFO:tensorflow:global_step/sec: 18.0414
INFO:tensorflow:loss = 0.0054350933, step = 55588 (5.543 sec)
INFO:tensorflow:global_step/sec: 18.0076
INFO:tensorflow:loss = 0.006956601, step = 55688 (5.553 sec)
INFO:tensorflow:global_step/sec: 17.49
INFO:tensorflow:loss = 0.0033472416, step = 55788 (5.718 sec)
INFO:tensorflow:global_step/sec: 18.029
INFO:tensorflow:loss = 0.851715, step = 55888 (5.546 sec)
INFO:tensorflow:global_step/sec: 17.9032
INFO:tensorflow:loss = 0.006102522, step = 55988 (5.586 sec)
INFO:tensorflow:global_step/sec: 17.9635
INFO:tensorflow:loss = 0.005139879, step = 56088 (5.567 sec)
INFO:tensorflow:global_step/sec: 17.9613
INFO:tensorflow:loss = 0.008087066, step = 56188 (5.568 sec)
INFO:tensorflow:global_step/

INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2018-06-07-13:34:20
INFO:tensorflow:Saving dict for global step 62699: accuracy = 0.5935, average_loss = 6.145805, global_step = 62699, loss = 614.5805
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model.ckpt-62699
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 62700 into gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model.ckpt.
INFO:tensorflow:loss = 0.0

INFO:tensorflow:global_step/sec: 17.7776
INFO:tensorflow:loss = 0.7189366, step = 69900 (5.625 sec)
INFO:tensorflow:global_step/sec: 18.0566
INFO:tensorflow:loss = 0.0050117425, step = 70000 (5.538 sec)
INFO:tensorflow:global_step/sec: 17.9501
INFO:tensorflow:loss = 0.004777463, step = 70100 (5.571 sec)
INFO:tensorflow:global_step/sec: 18.0933
INFO:tensorflow:loss = 0.003405476, step = 70200 (5.527 sec)
INFO:tensorflow:global_step/sec: 17.7099
INFO:tensorflow:loss = 0.71280247, step = 70300 (5.646 sec)
INFO:tensorflow:global_step/sec: 17.8958
INFO:tensorflow:loss = 0.004815519, step = 70400 (5.588 sec)
INFO:tensorflow:global_step/sec: 18.0022
INFO:tensorflow:loss = 0.00514838, step = 70500 (5.554 sec)
INFO:tensorflow:global_step/sec: 17.8288
INFO:tensorflow:loss = 0.0047084056, step = 70600 (5.609 sec)
INFO:tensorflow:global_step/sec: 17.5884
INFO:tensorflow:loss = 0.009188017, step = 70700 (5.685 sec)
INFO:tensorflow:global_step/sec: 17.8344
INFO:tensorflow:loss = 0.7214104, step = 70

INFO:tensorflow:loss = 0.004657271, step = 76378 (5.537 sec)
INFO:tensorflow:global_step/sec: 18.1409
INFO:tensorflow:loss = 0.004955592, step = 76478 (5.512 sec)
INFO:tensorflow:global_step/sec: 18.2769
INFO:tensorflow:loss = 0.0021248823, step = 76578 (5.471 sec)
INFO:tensorflow:global_step/sec: 18.1611
INFO:tensorflow:loss = 0.0025922165, step = 76678 (5.506 sec)
INFO:tensorflow:global_step/sec: 18.1821
INFO:tensorflow:loss = 0.0017152586, step = 76778 (5.500 sec)
INFO:tensorflow:global_step/sec: 17.868
INFO:tensorflow:loss = 0.0035074335, step = 76878 (5.597 sec)
INFO:tensorflow:global_step/sec: 17.8154
INFO:tensorflow:loss = 0.0041415384, step = 76978 (5.613 sec)
INFO:tensorflow:global_step/sec: 17.9936
INFO:tensorflow:loss = 1.389756, step = 77078 (5.557 sec)
INFO:tensorflow:global_step/sec: 17.9769
INFO:tensorflow:loss = 0.0024488962, step = 77178 (5.563 sec)
INFO:tensorflow:global_step/sec: 17.882
INFO:tensorflow:loss = 0.0031472617, step = 77278 (5.592 sec)
INFO:tensorflow:glo

INFO:tensorflow:global_step/sec: 18.0398
INFO:tensorflow:loss = 0.0044699707, step = 82962 (5.544 sec)
INFO:tensorflow:global_step/sec: 17.893
INFO:tensorflow:loss = 0.0036426785, step = 83062 (5.588 sec)
INFO:tensorflow:global_step/sec: 17.8312
INFO:tensorflow:loss = 0.0028453525, step = 83162 (5.608 sec)
INFO:tensorflow:global_step/sec: 17.8543
INFO:tensorflow:loss = 0.80150706, step = 83262 (5.602 sec)
INFO:tensorflow:global_step/sec: 17.7506
INFO:tensorflow:loss = 0.0018182433, step = 83362 (5.634 sec)
INFO:tensorflow:global_step/sec: 18.0855
INFO:tensorflow:loss = 0.0030403053, step = 83462 (5.529 sec)
INFO:tensorflow:global_step/sec: 17.5673
INFO:tensorflow:loss = 0.0020772032, step = 83562 (5.692 sec)
INFO:tensorflow:global_step/sec: 17.9003
INFO:tensorflow:loss = 0.002232374, step = 83662 (7.525 sec)
INFO:tensorflow:global_step/sec: 13.2676
INFO:tensorflow:loss = 0.0058173756, step = 83762 (5.598 sec)
INFO:tensorflow:global_step/sec: 17.8471
INFO:tensorflow:loss = 0.0013857321,

INFO:tensorflow:loss = 0.6904542, step = 90962 (5.547 sec)
INFO:tensorflow:global_step/sec: 17.9904
INFO:tensorflow:loss = 0.0020138163, step = 91062 (5.559 sec)
INFO:tensorflow:global_step/sec: 18.0718
INFO:tensorflow:loss = 0.0028685525, step = 91162 (5.533 sec)
INFO:tensorflow:global_step/sec: 18.1209
INFO:tensorflow:loss = 0.002155644, step = 91262 (5.519 sec)
INFO:tensorflow:global_step/sec: 17.838
INFO:tensorflow:loss = 0.0032614977, step = 91362 (5.607 sec)
INFO:tensorflow:global_step/sec: 17.8548
INFO:tensorflow:loss = 0.817647, step = 91462 (5.601 sec)
INFO:tensorflow:global_step/sec: 18.1475
INFO:tensorflow:loss = 0.7258201, step = 91562 (5.510 sec)
INFO:tensorflow:global_step/sec: 18.0728
INFO:tensorflow:loss = 0.00261384, step = 91662 (5.534 sec)
INFO:tensorflow:Saving checkpoints for 91686 into gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model.ckpt.
INFO:tensorflow:Loss for final step: 0.0051587857.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 18.0158
INFO:tensorflow:loss = 0.0026377884, step = 97487 (7.678 sec)
INFO:tensorflow:global_step/sec: 12.9725
INFO:tensorflow:loss = 0.0014358463, step = 97587 (5.581 sec)
INFO:tensorflow:global_step/sec: 17.755
INFO:tensorflow:loss = 0.0024661606, step = 97687 (5.632 sec)
INFO:tensorflow:global_step/sec: 17.7648
INFO:tensorflow:loss = 0.0027166451, step = 97787 (5.629 sec)
INFO:tensorflow:global_step/sec: 17.7002
INFO:tensorflow:loss = 0.76771516, step = 97887 (5.649 sec)
INFO:tensorflow:global_step/sec: 17.8205
INFO:tensorflow:loss = 0.76660883, step = 97987 (5.611 sec)
INFO:tensorflow:global_step/sec: 17.818
INFO:tensorflow:loss = 0.79133683, step = 98087 (5.613 sec)
INFO:tensorflow:global_step/sec: 18.1483
INFO:tensorflow:loss = 0.0030063405, step = 98187 (5.510 sec)
INFO:tensorflow:global_step/sec: 17.8995
INFO:tensorflow:loss = 0.002097207, step = 98287 (5.586 sec)
INFO:tensorflow:global_step/sec: 17.8078
INFO:tensorflow:loss = 0.0019056873, step

In [12]:
df[COLUMNS]

Unnamed: 0,Gender,Eye_color,Hair_color,Alignment,Agility,Accelerated_Healing,Lantern_Power_Ring,Dimensional_Awareness,Cold_Resistance,Durability,...,Web_Creation,Reality_Warping,Odin_Force,Symbiote_Costume,Speed_Force,Phoenix_Force,Molecular_Dissipation,Vision_Cryo,Omnipresent,Omniscient
0,Male,gold,Gold,bad,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
1,Female,blue,Blond,bad,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
2,Female,indigo,Black,neutral,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
3,Male,-,Black,bad,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
4,Male,-,-,bad,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
5,Male,-,-,bad,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
6,Male,blue,Black,good,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
7,Male,green,Blond,bad,TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
8,Male,blue,Blond,neutral,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
9,Male,-,-,bad,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE


In [13]:
print()
tf.estimator.train_and_evaluate(
    est,
    tf.estimator.TrainSpec(input_fn=create_trainset, max_steps=TRAIN_STEPS),
    tf.estimator.EvalSpec(input_fn=create_evalset, steps=df_eval.shape[0])
)

()
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Skipping training since max_steps has already saved.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-07-14:18:09
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model.ckpt-100000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [6/64]
INFO:tensorflow:Evaluation [12/64]
INFO:tensorflow:Evaluation [18/64]
INFO:tensorflow:Evaluation [24/64]
INFO:tensorflow:Evaluation [30/64]
INFO:tensorflow:Evaluation [36/64]
INFO:tensorflow:Evaluation [42/64]
INFO:tensorflow:Evaluation [48/64]
INFO:tensorflow:Evaluation [54/64]
INFO:tensorflow:Evalu

In [14]:
tf.estimator.train_and_evaluate(
    est,
    tf.estimator.TrainSpec(input_fn=create_trainset, max_steps=TRAIN_STEPS),
    tf.estimator.EvalSpec(input_fn=create_trainset)
)

INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Skipping training since max_steps has already saved.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-07-14:18:37
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model.ckpt-100000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflo

In [15]:
COLUMNS

['Gender',
 'Eye_color',
 'Hair_color',
 'Alignment',
 'Agility',
 'Accelerated_Healing',
 'Lantern_Power_Ring',
 'Dimensional_Awareness',
 'Cold_Resistance',
 'Durability',
 'Stealth',
 'Energy_Absorption',
 'Flight',
 'Danger_Sense',
 'Underwater_breathing',
 'Marksmanship',
 'Weapons_Master',
 'Power_Augmentation',
 'Animal_Attributes',
 'Longevity',
 'Intelligence',
 'Super_Strength',
 'Cryokinesis',
 'Telepathy',
 'Energy_Armor',
 'Energy_Blasts',
 'Duplication',
 'Size_Changing',
 'Density_Control',
 'Stamina',
 'Astral_Travel',
 'Audio_Control',
 'Dexterity',
 'Omnitrix',
 'Super_Speed',
 'Possession',
 'Animal_Oriented_Powers',
 'Weapon-based_Powers',
 'Electrokinesis',
 'Darkforce_Manipulation',
 'Death_Touch',
 'Teleportation',
 'Enhanced_Senses',
 'Telekinesis',
 'Energy_Beams',
 'Magic',
 'Hyperkinesis',
 'Jump',
 'Clairvoyance',
 'Dimensional_Travel',
 'Power_Sense',
 'Shapeshifting',
 'Peak_Human_Condition',
 'Immortality',
 'Camouflage',
 'Element_Control',
 'Phasing',
 

### export the model

In [16]:
# attempt 2
feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
#export_input_fn = tf.contrib.learn.build_parsing_serving_input_fn(feature_spec)
servable_model_path=est.export_savedmodel(os.path.join(MODEL_DIR,"model"),export_input_fn)
servable_model_path

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model.ckpt-100000
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model/temp-1528381144/saved_model.pb


'gs://ml-research-injenia/estimators/trainings/dnn-classifier/test07/model/1528381144'

In [17]:
with open_file(os.path.join(MODEL_DIR,"data","latest_model.txt"), "w") as f:
    f.write(servable_model_path)

In [18]:
raise

TypeError: exceptions must be old-style classes or derived from BaseException, not NoneType

# INFERENCE: TENSORFLOW MODEL SERVER

In [None]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import os
import pandas as pd

import numpy as np
import tensorflow as tf
from tensorflow.python.lib.io.file_io import FileIO as open_file

In [None]:
from tensorflow_model_server.tensorflow_model_server import TensorflowModelServer
from tensorflow_model_server.inout.requests import build_estimator_request, EstimatorRequest

In [None]:
MODEL_PATH=servable_model_path
with open_file(os.path.join(MODEL_DIR,"data","dataset_fields.json"), "r") as f:
    inputs_definition=json.load(f)
    
COLUMNS=inputs_definition["fields"]["columns"]
FIELD_TYPES=inputs_definition["fields"]["types"]
FIELD_CATEGORIES=inputs_definition["fields"]["categories"]
LABELS=inputs_definition["label"]["categories"]

In [None]:
with open_file(os.path.join(MODEL_DIR,"data","actual_evalset.csv"), "r") as f:
    df = pd.read_csv(f)

In [None]:
def extract_outcomes(predictions):
    for p in predictions:
        if(not p["net"]["success"]):
            print(p)
            raise
    
    return [p["net"]["result"]["classes"][np.argmax(
        p["net"]["result"]["scores"]
    )] for p in predictions]

In [None]:
BATCH_SIZE=15
heroes=list(df["index"].values)
requests=[]
expected_results=[]
results=[]
for i,h in enumerate(heroes):
    feats=list(df[df["index"]==h].drop([LABEL_FIELD], axis=1).values[0][1:])
    requests.append({
        "COLUMNS":COLUMNS,
        "FIELD_TYPES":FIELD_TYPES,
        "features":feats
    })
    expected_results.append(df[df["index"]==h][LABEL_FIELD].values[0])
chunks = [requests[x:x+BATCH_SIZE] for x in xrange(0, len(requests), BATCH_SIZE)]

with TensorflowModelServer(
    {"net":MODEL_PATH}, 
    request_builder=build_estimator_request
) as tms:
    for c in chunks:
        results.extend(extract_outcomes(tms.predict(c)))

In [None]:
correct=0
total=0
for i,result in enumerate(results):
    if result==expected_results[i]:
        correct+=1
    total+=1
print("Accuracy:")
print(float(correct)/float(total))

In [None]:
BATCH_SIZE=15
heroes=list(df["index"].values)
requests=[]
expected_results=[]
results=[]
for i,h in enumerate(heroes):
    feats=list(df[df["index"]==h].drop([LABEL_FIELD], axis=1).values[0][1:])
    requests.append(feats)
    expected_results.append(df[df["index"]==h][LABEL_FIELD].values[0])
    
chunks = [requests[x:x+BATCH_SIZE] for x in xrange(0, len(requests), BATCH_SIZE)]

with TensorflowModelServer(
    {"net":MODEL_PATH}, 
    request_builder=EstimatorRequest(COLUMNS,FIELD_TYPES)
) as tms:
    for c in chunks:
        results.extend(extract_outcomes(tms.predict(c)))

In [None]:
correct=0
total=0
for i,result in enumerate(results):
    if result==expected_results[i]:
        correct+=1
    total+=1
print("Accuracy:")
print(float(correct)/float(total))

In [None]:
# THIS IS TO STOP THE "RESTART AND RUN ALL" COMMAND
raise

# EXPERIMENTS

In [None]:
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

In [None]:
est.get_variable_names()

In [None]:
weights_name="linear/linear_model/Accelerated_Healing/weights"
est.get_variable_value(weights_name)

In [None]:
weights_name="linear/linear_model/Accelerated_Healing/weights/part_0/Ftrl"
est.get_variable_value(weights_name)

In [None]:
weights_name="linear/linear_model/Accelerated_Healing/weights/part_0/Ftrl_1"
est.get_variable_value(weights_name)

In [None]:
feature="bias_weights"
weights={x:est.get_variable_value(x) for x in est.get_variable_names() if feature in x}
weights

In [None]:
feature="Omniscient"
weights={x:est.get_variable_value(x) for x in est.get_variable_names() if feature in x}
weights

# INFERENCE: GOOGLE CLOUD ML ENGINE

In [None]:
PROJECT="injenia-ricerca"
MODEL=""
VERSION=None

In [None]:
from oauth2client.client import GoogleCredentials
import googleapiclient
import json
import os
import pandas as pd
import base64
import tensorflow as tf
from tensorflow.python.lib.io.file_io import FileIO as open_file

In [None]:
with open_file(os.path.join(MODEL_DIR,"data","latest_model.txt"), "r") as f:
    MODEL_PATH=f.read()
MODEL_PATH
with open_file(os.path.join(MODEL_DIR,"data","dataset_fields.json"), "r") as f:
    inputs_definition=json.load(f)
    
COLUMNS=inputs_definition["fields"]["columns"]
FIELD_TYPES=inputs_definition["fields"]["types"]
FIELD_CATEGORIES=inputs_definition["fields"]["categories"]
LABELS=inputs_definition["label"]["categories"]

In [None]:
with open_file(os.path.join(MODEL_DIR,"data","actual_dataset.csv"), "r") as f:
    df = pd.read_csv(f)
df.columns=[x.strip().replace(" ", "_").replace("_-_", "_") for x in df.columns]
df = df[df["Alignment"].isin(["good","bad","neutral"]) ]
#df = df.drop(["Publisher"], axis=1)
for c in COLUMNS:
    if df[c].dtypes.name == 'bool':
        df[c]=df[c].map({True: 'TRUE', False: 'FALSE'})

In [None]:
superman_feats = list(df[df["index"]=="Superman"].drop(["Alignment"], axis=1).values[0][1:])
feats=superman_feats

In [None]:
def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def build_cmle_request(COLUMNS, FIELD_TYPES, feats):    
    #COLUMNS=data["COLUMNS"]
    #FIELD_TYPES=data["FIELD_TYPES"]
    #feats=data["features"]
    
    feature_dict={}

    for i,c in enumerate(COLUMNS):
        if FIELD_TYPES[c]=="string":
            feature_dict[c]=_bytes_feature(value=feats[i].encode())
        if FIELD_TYPES[c]=="number":
            feature_dict[c]=_float_feature(value=feats[i])


    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    serialized = example.SerializeToString()

    return {"inputs":serialized}
    #return serialized

instances=[build_cmle_request(COLUMNS, FIELD_TYPES, feats)]
instances

In [None]:
def execute_request(project,model,version=None,instances=[]):
    service = googleapiclient.discovery.build('ml', 'v1')
    name = 'projects/{}/models/{}'.format(project, model)
    if version is not None:
        name += '/versions/{}'.format(version)

    response = service.projects().predict(
        name=name,
        body={'instances': instances}
    ).execute()

    return response

resp=execute_request(PROJECT,MODEL,VERSION,instances)
resp