# DATASET DEFINITION

In [1]:
import tensorflow as tf
import pandas as pd
import os
import json
import shutil
from tensorflow.python.lib.io.file_io import FileIO as open_file

  from ._conv import register_converters as _register_converters


In [2]:
DATASET_CSV="gs://ml-research-injenia/estimators/datasets/superhero-set/preproc_trainset.csv"
EVALSET_CSV="gs://ml-research-injenia/estimators/datasets/superhero-set/preproc_evalset.csv"
OUT_DIR="gs://ml-research-injenia/estimators/trainings/linear-classifier/test08"
MODEL_DIR=OUT_DIR

BATCH_SIZE = 100
TRAIN_STEPS= 100000

HIDDEN_UNITS=[1024, 512, 256]
EMBEDDING_COLUMNS_SIZE=4

In [3]:
with open_file(DATASET_CSV, "r") as f:
    df = pd.read_csv(f)
for c in df.columns:
    if df[c].dtypes.name == 'bool':
        df[c]=df[c].map({True: 'TRUE', False: 'FALSE'})
df

Unnamed: 0,index,Gender,Eye_color,Race,Hair_color,Height,Publisher,Skin_color,Alignment,Weight,...,Web_Creation,Reality_Warping,Odin_Force,Symbiote_Costume,Speed_Force,Phoenix_Force,Molecular_Dissipation,Vision_Cryo,Omnipresent,Omniscient
0,Molten Man,Male,gold,-,Gold,196.0,Marvel Comics,-,bad,248.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
1,Killer Frost,Female,blue,Human,Blond,-99.0,DC Comics,blue,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
2,Raven,Female,indigo,Human,Black,165.0,DC Comics,-,neutral,50.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
3,Vegeta,Male,-,Saiyan,Black,168.0,Shueisha,-,bad,73.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
4,Gog,Male,-,-,-,-99.0,DC Comics,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
5,Redeemer II,Male,-,-,-,-99.0,Image Comics,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
6,Captain Marvel II,Male,blue,Human,Black,175.0,DC Comics,-,good,74.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
7,Proto-Goblin,Male,green,-,Blond,-99.0,Marvel Comics,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
8,Sentry,Male,blue,Mutant,Blond,188.0,Marvel Comics,-,neutral,87.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
9,Kylo Ren,Male,-,Human,-,-99.0,George Lucas,-,bad,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE


In [4]:
with open_file(EVALSET_CSV, "r") as f:
    df_eval = pd.read_csv(f)
for c in df_eval.columns:
    if df_eval[c].dtypes.name == 'bool':
        df_eval[c]=df_eval[c].map({True: 'TRUE', False: 'FALSE'})
df_eval

Unnamed: 0,index,Gender,Eye_color,Race,Hair_color,Height,Publisher,Skin_color,Alignment,Weight,...,Web_Creation,Reality_Warping,Odin_Force,Symbiote_Costume,Speed_Force,Phoenix_Force,Molecular_Dissipation,Vision_Cryo,Omnipresent,Omniscient
0,Phantom,Male,-,-,-,-99.0,DC Comics,-,good,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
1,Absorbing Man,Male,blue,Human,No Hair,193.0,Marvel Comics,-,bad,122.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
2,Deadshot,Male,brown,Human,Brown,185.0,DC Comics,-,bad,91.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
3,Hal Jordan,Male,brown,Human,Brown,188.0,DC Comics,-,good,90.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
4,Black Flash,Male,-,God / Eternal,-,-99.0,DC Comics,-,neutral,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
5,Zatanna,Female,blue,Human,Black,170.0,DC Comics,-,good,57.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
6,Captain America,Male,blue,Human,blond,188.0,Marvel Comics,-,good,108.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
7,Captain Epic,Male,blue,-,Brown,188.0,Team Epic TV,-,good,-99.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
8,Vulture,Male,brown,Human,No Hair,180.0,Marvel Comics,-,bad,79.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
9,Hyperion,Male,blue,Eternal,Red,183.0,Marvel Comics,-,good,207.0,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE


In [5]:
with open_file(os.path.join(MODEL_DIR,"trainset.csv"), "w") as f:
    df.to_csv(f,index=False)
with open_file(os.path.join(MODEL_DIR,"evalset.csv"), "w") as f:
    df_eval.to_csv(f,index=False)

In [6]:
exclude_columns=["Publisher","Height","Weight","Skin_color","Race"]
INDEX=df.columns[0]
COLUMNS=[x for x in df.columns[1:] if x not in exclude_columns]
LABEL_FIELD="Alignment"

# TRAINING

### Gather all the required informations from the dataset

In [7]:
FIELD_DEFAULTS=[]
FIELD_TYPES={}
FIELD_CATEGORIES={}
dtypes=dict(df.dtypes)
for c in COLUMNS:
    if(str(dtypes[c])=="bool"):
        FIELD_DEFAULTS.append([0])
        FIELD_TYPES[c]="bool"
    elif(str(dtypes[c])=="object"):
        FIELD_DEFAULTS.append(["NA"])
        FIELD_TYPES[c]="string"
        FIELD_CATEGORIES[c]=list(sorted(set(list(df[c].unique())+["NA"])))
    else:  
        FIELD_DEFAULTS.append([0.0])
        FIELD_TYPES[c]="number"
FIELD_CATEGORIES[LABEL_FIELD]=[x for x in FIELD_CATEGORIES[LABEL_FIELD] if x != "NA"]

### Save dataset informations

In [8]:
#if(not os.path.isdir(MODEL_DIR)):
#    os.makedirs(MODEL_DIR)

with open_file(os.path.join(MODEL_DIR,"data","dataset_fields.json"), "w") as f:
    json.dump(
        {
            "fields":{
                "columns" : [x for x in COLUMNS if x != LABEL_FIELD],
                "types" : {x:FIELD_TYPES[x] for x in FIELD_TYPES if x != LABEL_FIELD},
                "categories" : {x:FIELD_CATEGORIES[x] for x in FIELD_CATEGORIES if x != LABEL_FIELD}
            },
            "label":{
                "column" : LABEL_FIELD,
                "type" : FIELD_TYPES[LABEL_FIELD],
                "categories" : FIELD_CATEGORIES[LABEL_FIELD],
            }
        },
        f
    )

### Create a dataset object

In [9]:
with open_file(os.path.join(MODEL_DIR,"data","actual_trainset.csv"), "w") as f:
    df[[INDEX]+COLUMNS].to_csv(f, index=False)
with open_file(os.path.join(MODEL_DIR,"data","actual_evalset.csv"), "w") as f:
    df_eval[[INDEX]+COLUMNS].to_csv(f, index=False)

In [10]:
#tf.reset_default_graph()
with open_file(os.path.join(MODEL_DIR,"data","tf_trainset.csv"), "w") as f:
    df[COLUMNS].to_csv(f, index=False)

def create_trainset():
    ds = tf.data.TextLineDataset(os.path.join(MODEL_DIR,"data","tf_trainset.csv")).skip(1)
    def _parse_line(line):
        # Decode the line into its fields
        fields = tf.decode_csv(line, FIELD_DEFAULTS)

        # Pack the result into a dictionary
        features = dict(zip(COLUMNS,fields))

        # Separate the label from the features
        label = features.pop(LABEL_FIELD)

        return features, label

    parsed_ds = ds.map(_parse_line)
    
    return parsed_ds.shuffle(TRAIN_STEPS).repeat().batch(BATCH_SIZE)




with open_file(os.path.join(MODEL_DIR,"data","tf_evalset.csv"), "w") as f:
    df_eval[COLUMNS].to_csv(f, index=False)

def create_evalset():
    ds = tf.data.TextLineDataset(os.path.join(MODEL_DIR,"data","tf_evalset.csv")).skip(1)
    def _parse_line(line):
        # Decode the line into its fields
        fields = tf.decode_csv(line, FIELD_DEFAULTS)

        # Pack the result into a dictionary
        features = dict(zip(COLUMNS,fields))

        # Separate the label from the features
        label = features.pop(LABEL_FIELD)

        return features, label

    parsed_ds = ds.map(_parse_line)
    
    return parsed_ds.shuffle(TRAIN_STEPS).repeat().batch(BATCH_SIZE)

### build and train the estimator

In [11]:
print(list(FIELD_CATEGORIES[LABEL_FIELD]))
feature_columns=[]
for c in COLUMNS:
    if c == LABEL_FIELD:
        continue
        
    if FIELD_TYPES[c]=="string":
        feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(
                key=c,
                vocabulary_list=list(FIELD_CATEGORIES[c])
            ))
    if FIELD_TYPES[c]=="number":
        feature_columns.append(tf.feature_column.numeric_column(key=c))
        
est = tf.estimator.LinearClassifier(
    feature_columns,
    n_classes=len(FIELD_CATEGORIES[LABEL_FIELD]),
    label_vocabulary=list(FIELD_CATEGORIES[LABEL_FIELD]),
    model_dir=MODEL_DIR
)
# Train the estimator
#est.train(
#    steps=TRAIN_STEPS,
#    input_fn=create_dataset)

# Train and evaluate the estimator
tf.estimator.train_and_evaluate(
    est,
    tf.estimator.TrainSpec(input_fn=create_trainset, max_steps=TRAIN_STEPS),
    tf.estimator.EvalSpec(input_fn=create_evalset)
)

['bad', 'good', 'neutral']
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fee7e0c8750>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'gs://ml-research-injenia/estimators/trainings/linear-classifier/test08', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorf

INFO:tensorflow:loss = 26.730362, step = 51587 (4.925 sec)
INFO:tensorflow:global_step/sec: 20.2063
INFO:tensorflow:loss = 26.153997, step = 51687 (4.949 sec)
INFO:tensorflow:global_step/sec: 20.0325
INFO:tensorflow:loss = 25.36364, step = 51787 (4.992 sec)
INFO:tensorflow:global_step/sec: 19.8935
INFO:tensorflow:loss = 17.165678, step = 51887 (5.027 sec)
INFO:tensorflow:global_step/sec: 19.9187
INFO:tensorflow:loss = 26.89124, step = 51987 (5.020 sec)
INFO:tensorflow:global_step/sec: 19.6705
INFO:tensorflow:loss = 29.629288, step = 52087 (5.084 sec)
INFO:tensorflow:global_step/sec: 19.9644
INFO:tensorflow:loss = 34.456448, step = 52187 (5.009 sec)
INFO:tensorflow:global_step/sec: 19.9652
INFO:tensorflow:loss = 22.087118, step = 52287 (5.009 sec)
INFO:tensorflow:global_step/sec: 19.5554
INFO:tensorflow:loss = 21.32839, step = 52387 (5.114 sec)
INFO:tensorflow:global_step/sec: 19.862
INFO:tensorflow:loss = 19.127657, step = 52487 (5.034 sec)
INFO:tensorflow:global_step/sec: 19.961
INFO:

INFO:tensorflow:global_step/sec: 14.7233
INFO:tensorflow:loss = 21.401382, step = 58254 (5.156 sec)
INFO:tensorflow:global_step/sec: 19.6274
INFO:tensorflow:loss = 24.374105, step = 58354 (5.095 sec)
INFO:tensorflow:global_step/sec: 19.813
INFO:tensorflow:loss = 29.532589, step = 58454 (5.048 sec)
INFO:tensorflow:global_step/sec: 20.0077
INFO:tensorflow:loss = 21.527369, step = 58554 (4.998 sec)
INFO:tensorflow:global_step/sec: 19.265
INFO:tensorflow:loss = 29.974852, step = 58654 (5.190 sec)
INFO:tensorflow:global_step/sec: 19.5222
INFO:tensorflow:loss = 24.920753, step = 58754 (5.123 sec)
INFO:tensorflow:global_step/sec: 20.0294
INFO:tensorflow:loss = 32.241375, step = 58854 (4.993 sec)
INFO:tensorflow:global_step/sec: 19.9495
INFO:tensorflow:loss = 27.32621, step = 58954 (5.013 sec)
INFO:tensorflow:global_step/sec: 19.7882
INFO:tensorflow:loss = 22.394547, step = 59054 (5.054 sec)
INFO:tensorflow:global_step/sec: 19.897
INFO:tensorflow:loss = 22.836061, step = 59154 (5.026 sec)
INFO

INFO:tensorflow:Loss for final step: 25.662817.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-07-11:16:21
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model.ckpt-66283
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2018-06-07-11:16:34
INFO:tensorflow:Saving dict for global step 66283: accuracy = 0.7034, average_loss = 1.5417795, global_step = 66283, loss = 154.17795
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 20.213
INFO:tensorflow:loss = 33.0566, step = 73084 (4.947 sec)
INFO:tensorflow:global_step/sec: 20.0743
INFO:tensorflow:loss = 21.669909, step = 73184 (4.982 sec)
INFO:tensorflow:global_step/sec: 19.9549
INFO:tensorflow:loss = 31.63424, step = 73284 (5.011 sec)
INFO:tensorflow:global_step/sec: 20.0747
INFO:tensorflow:loss = 22.208181, step = 73384 (4.982 sec)
INFO:tensorflow:global_step/sec: 20.2881
INFO:tensorflow:loss = 25.376202, step = 73484 (4.929 sec)
INFO:tensorflow:global_step/sec: 20.3386
INFO:tensorflow:loss = 35.85001, step = 73584 (4.916 sec)
INFO:tensorflow:global_step/sec: 20.0934
INFO:tensorflow:loss = 36.08696, step = 73684 (4.977 sec)
INFO:tensorflow:global_step/sec: 19.9959
INFO:tensorflow:loss = 29.727703, step = 73784 (5.001 sec)
INFO:tensorflow:global_step/sec: 20.2164
INFO:tensorflow:loss = 35.1582, step = 73884 (4.946 sec)
INFO:tensorflow:global_step/sec: 19.9411
INFO:tensorflow:loss = 20.79812, step = 73984 (5.015 sec)
INFO:tens

INFO:tensorflow:global_step/sec: 20.6508
INFO:tensorflow:loss = 32.696926, step = 79690 (4.843 sec)
INFO:tensorflow:global_step/sec: 20.0887
INFO:tensorflow:loss = 28.914324, step = 79790 (4.977 sec)
INFO:tensorflow:global_step/sec: 20.157
INFO:tensorflow:loss = 27.27371, step = 79890 (4.961 sec)
INFO:tensorflow:global_step/sec: 19.9935
INFO:tensorflow:loss = 22.532568, step = 79990 (5.002 sec)
INFO:tensorflow:global_step/sec: 20.1305
INFO:tensorflow:loss = 21.749043, step = 80090 (4.967 sec)
INFO:tensorflow:global_step/sec: 19.9552
INFO:tensorflow:loss = 30.410427, step = 80190 (5.011 sec)
INFO:tensorflow:global_step/sec: 20.0254
INFO:tensorflow:loss = 34.448685, step = 80290 (4.994 sec)
INFO:tensorflow:global_step/sec: 19.8559
INFO:tensorflow:loss = 23.87996, step = 80390 (5.037 sec)
INFO:tensorflow:global_step/sec: 19.694
INFO:tensorflow:loss = 28.993765, step = 80490 (5.078 sec)
INFO:tensorflow:global_step/sec: 19.5459
INFO:tensorflow:loss = 25.678026, step = 80590 (5.116 sec)
INFO

INFO:tensorflow:Loss for final step: 28.658098.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-07-11:38:35
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model.ckpt-87810
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensorflow:Evaluation [100/100]
INFO:tensorflow:Finished evaluation at 2018-06-07-11:38:48
INFO:tensorflow:Saving dict for global step 87810: accuracy = 0.7031, average_loss = 1.6286674, global_step = 87810, loss = 162.86674
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 20.3856
INFO:tensorflow:loss = 16.907185, step = 94611 (4.905 sec)
INFO:tensorflow:global_step/sec: 20.3942
INFO:tensorflow:loss = 20.395046, step = 94711 (4.903 sec)
INFO:tensorflow:global_step/sec: 20.0802
INFO:tensorflow:loss = 24.302214, step = 94811 (4.980 sec)
INFO:tensorflow:global_step/sec: 20.0565
INFO:tensorflow:loss = 24.438704, step = 94911 (4.986 sec)
INFO:tensorflow:global_step/sec: 20.2129
INFO:tensorflow:loss = 27.646374, step = 95011 (4.947 sec)
INFO:tensorflow:global_step/sec: 20.1579
INFO:tensorflow:loss = 16.429848, step = 95111 (4.961 sec)
INFO:tensorflow:global_step/sec: 20.3382
INFO:tensorflow:loss = 22.700542, step = 95211 (4.917 sec)
INFO:tensorflow:global_step/sec: 20.3186
INFO:tensorflow:loss = 28.143822, step = 95311 (4.921 sec)
INFO:tensorflow:global_step/sec: 20.0887
INFO:tensorflow:loss = 25.989758, step = 95411 (4.978 sec)
INFO:tensorflow:global_step/sec: 20.1907
INFO:tensorflow:loss = 25.271893, step = 95511 (4.953 sec)


In [12]:
df[COLUMNS]

Unnamed: 0,Gender,Eye_color,Hair_color,Alignment,Agility,Accelerated_Healing,Lantern_Power_Ring,Dimensional_Awareness,Cold_Resistance,Durability,...,Web_Creation,Reality_Warping,Odin_Force,Symbiote_Costume,Speed_Force,Phoenix_Force,Molecular_Dissipation,Vision_Cryo,Omnipresent,Omniscient
0,Male,gold,Gold,bad,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
1,Female,blue,Blond,bad,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
2,Female,indigo,Black,neutral,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
3,Male,-,Black,bad,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
4,Male,-,-,bad,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
5,Male,-,-,bad,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
6,Male,blue,Black,good,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
7,Male,green,Blond,bad,TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
8,Male,blue,Blond,neutral,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE
9,Male,-,-,bad,TRUE,TRUE,FALSE,FALSE,FALSE,TRUE,...,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE,FALSE


In [13]:
print()
tf.estimator.train_and_evaluate(
    est,
    tf.estimator.TrainSpec(input_fn=create_trainset, max_steps=TRAIN_STEPS),
    tf.estimator.EvalSpec(input_fn=create_evalset, steps=df_eval.shape[0])
)

()
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Skipping training since max_steps has already saved.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-07-11:54:28
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model.ckpt-100000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [6/64]
INFO:tensorflow:Evaluation [12/64]
INFO:tensorflow:Evaluation [18/64]
INFO:tensorflow:Evaluation [24/64]
INFO:tensorflow:Evaluation [30/64]
INFO:tensorflow:Evaluation [36/64]
INFO:tensorflow:Evaluation [42/64]
INFO:tensorflow:Evaluation [48/64]
INFO:tensorflow:Evaluation [54/64]
INFO:tensorflow:Ev

In [14]:
tf.estimator.train_and_evaluate(
    est,
    tf.estimator.TrainSpec(input_fn=create_trainset, max_steps=TRAIN_STEPS),
    tf.estimator.EvalSpec(input_fn=create_trainset)
)

INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 600 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Skipping training since max_steps has already saved.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-07-11:54:55
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model.ckpt-100000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [10/100]
INFO:tensorflow:Evaluation [20/100]
INFO:tensorflow:Evaluation [30/100]
INFO:tensorflow:Evaluation [40/100]
INFO:tensorflow:Evaluation [50/100]
INFO:tensorflow:Evaluation [60/100]
INFO:tensorflow:Evaluation [70/100]
INFO:tensorflow:Evaluation [80/100]
INFO:tensorflow:Evaluation [90/100]
INFO:tensor

In [15]:
COLUMNS

['Gender',
 'Eye_color',
 'Hair_color',
 'Alignment',
 'Agility',
 'Accelerated_Healing',
 'Lantern_Power_Ring',
 'Dimensional_Awareness',
 'Cold_Resistance',
 'Durability',
 'Stealth',
 'Energy_Absorption',
 'Flight',
 'Danger_Sense',
 'Underwater_breathing',
 'Marksmanship',
 'Weapons_Master',
 'Power_Augmentation',
 'Animal_Attributes',
 'Longevity',
 'Intelligence',
 'Super_Strength',
 'Cryokinesis',
 'Telepathy',
 'Energy_Armor',
 'Energy_Blasts',
 'Duplication',
 'Size_Changing',
 'Density_Control',
 'Stamina',
 'Astral_Travel',
 'Audio_Control',
 'Dexterity',
 'Omnitrix',
 'Super_Speed',
 'Possession',
 'Animal_Oriented_Powers',
 'Weapon-based_Powers',
 'Electrokinesis',
 'Darkforce_Manipulation',
 'Death_Touch',
 'Teleportation',
 'Enhanced_Senses',
 'Telekinesis',
 'Energy_Beams',
 'Magic',
 'Hyperkinesis',
 'Jump',
 'Clairvoyance',
 'Dimensional_Travel',
 'Power_Sense',
 'Shapeshifting',
 'Peak_Human_Condition',
 'Immortality',
 'Camouflage',
 'Element_Control',
 'Phasing',
 

### export the model

In [16]:
# attempt 2
feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
export_input_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
#export_input_fn = tf.contrib.learn.build_parsing_serving_input_fn(feature_spec)
servable_model_path=est.export_savedmodel(os.path.join(MODEL_DIR,"model"),export_input_fn)
servable_model_path

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: ['serving_default', 'classification']
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Restoring parameters from gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model.ckpt-100000
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model/temp-1528372522/saved_model.pb


'gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model/1528372522'

In [17]:
with open_file(os.path.join(MODEL_DIR,"data","latest_model.txt"), "w") as f:
    f.write(servable_model_path)

In [18]:
raise

TypeError: exceptions must be old-style classes or derived from BaseException, not NoneType

# INFERENCE: TENSORFLOW MODEL SERVER

In [19]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
import os
import pandas as pd

import numpy as np
import tensorflow as tf
from tensorflow.python.lib.io.file_io import FileIO as open_file

In [20]:
from tensorflow_model_server.tensorflow_model_server import TensorflowModelServer
from tensorflow_model_server.inout.requests import build_estimator_request, EstimatorRequest

In [21]:
MODEL_PATH=servable_model_path
with open_file(os.path.join(MODEL_DIR,"data","dataset_fields.json"), "r") as f:
    inputs_definition=json.load(f)
    
COLUMNS=inputs_definition["fields"]["columns"]
FIELD_TYPES=inputs_definition["fields"]["types"]
FIELD_CATEGORIES=inputs_definition["fields"]["categories"]
LABELS=inputs_definition["label"]["categories"]

In [25]:
with open_file(os.path.join(MODEL_DIR,"data","actual_evalset.csv"), "r") as f:
    df = pd.read_csv(f)
for c in df.columns:
    if df[c].dtypes.name == 'bool':
        df[c]=df[c].map({True: 'TRUE', False: 'FALSE'})

In [26]:
def extract_outcomes(predictions):
    for p in predictions:
        if(not p["net"]["success"]):
            print(p)
            raise
    
    return [p["net"]["result"]["classes"][np.argmax(
        p["net"]["result"]["scores"]
    )] for p in predictions]

In [28]:
BATCH_SIZE=15
heroes=list(df["index"].values)
requests=[]
expected_results=[]
results=[]
for i,h in enumerate(heroes):
    feats=list(df[df["index"]==h].drop([LABEL_FIELD], axis=1).values[0][1:])
    requests.append({
        "COLUMNS":COLUMNS,
        "FIELD_TYPES":FIELD_TYPES,
        "features":feats
    })
    expected_results.append(df[df["index"]==h][LABEL_FIELD].values[0])
chunks = [requests[x:x+BATCH_SIZE] for x in xrange(0, len(requests), BATCH_SIZE)]

with TensorflowModelServer(
    {"net":MODEL_PATH}, 
    request_builder=build_estimator_request
) as tms:
    for i,c in enumerate(chunks):
        print((i,len(chunks)))
        results.extend(extract_outcomes(tms.predict(c)))

downloading models...
downloading net from gs://ml-research-injenia/estimators/trainings/linear-classifier/test08/model/1528372522 to ./tensorflow_model_server_staging/net
model net downloaded to /notebooks/MIRCO/Estimators/tensorflow_model_server_staging/net
launching:
tensorflow_model_server --port=9000 --model_config_file=./tensorflow_model_server_staging/config.yaml
2018-06-07 12:46:55.319729: I tensorflow_serving/model_servers/server_core.cc:444] Adding/updating models.

2018-06-07 12:46:55.319778: I tensorflow_serving/model_servers/server_core.cc:499]  (Re-)adding model: net

2018-06-07 12:46:55.420209: I tensorflow_serving/core/basic_manager.cc:716] Successfully reserved resources to load servable {name: net version: 1}

2018-06-07 12:46:55.420250: I tensorflow_serving/core/loader_harness.cc:66] Approving load for servable version {name: net version: 1}

2018-06-07 12:46:55.420259: I tensorflow_serving/core/loader_harness.cc:74] Loading servable version {name: net version: 1}

2

  File "<ipython-input-28-896caab8137a>", line 22, in <module>
    results.extend(extract_outcomes(tms.predict(c)))
  File "tensorflow_model_server/tensorflow_model_server.py", line 271, in predict
    response_formatter=self.response_formatter)
  File "tensorflow_model_server/parallel_prediction.py", line 137, in parallel_batch_predictions
    results = [q.get() for p in processes]
  File "/usr/lib/python2.7/multiprocessing/queues.py", line 117, in get
    res = self._recv()


In [29]:
correct=0
total=0
for i,result in enumerate(results):
    if result==expected_results[i]:
        correct+=1
    total+=1
print("Accuracy:")
print(float(correct)/float(total))

Accuracy:


ZeroDivisionError: float division by zero

In [None]:
BATCH_SIZE=15
heroes=list(df["index"].values)
requests=[]
expected_results=[]
results=[]
for i,h in enumerate(heroes):
    feats=list(df[df["index"]==h].drop([LABEL_FIELD], axis=1).values[0][1:])
    requests.append(feats)
    expected_results.append(df[df["index"]==h][LABEL_FIELD].values[0])
    
chunks = [requests[x:x+BATCH_SIZE] for x in xrange(0, len(requests), BATCH_SIZE)]

with TensorflowModelServer(
    {"net":MODEL_PATH}, 
    request_builder=EstimatorRequest(COLUMNS,FIELD_TYPES)
) as tms:
    for c in chunks:
        results.extend(extract_outcomes(tms.predict(c)))

In [None]:
correct=0
total=0
for i,result in enumerate(results):
    if result==expected_results[i]:
        correct+=1
    total+=1
print("Accuracy:")
print(float(correct)/float(total))

In [None]:
import itertools
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
%matplotlib inline

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def confusion_matrix(y_test, y_pred):
    # Compute confusion matrix
    cnf_matrix = confusion_matrix(y_test, y_pred)
    np.set_printoptions(precision=2)

    # Plot non-normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names,
                          title='Confusion matrix, without normalization')

    # Plot normalized confusion matrix
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True,
                          title='Normalized confusion matrix')

    plt.show()

In [None]:
# THIS IS TO STOP THE "RESTART AND RUN ALL" COMMAND
raise

# EXPERIMENTS

In [None]:
tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)

In [None]:
est.get_variable_names()

In [None]:
weights_name="linear/linear_model/Accelerated_Healing/weights"
est.get_variable_value(weights_name)

In [None]:
weights_name="linear/linear_model/Accelerated_Healing/weights/part_0/Ftrl"
est.get_variable_value(weights_name)

In [None]:
weights_name="linear/linear_model/Accelerated_Healing/weights/part_0/Ftrl_1"
est.get_variable_value(weights_name)

In [None]:
feature="bias_weights"
weights={x:est.get_variable_value(x) for x in est.get_variable_names() if feature in x}
weights

In [None]:
feature="Omniscient"
weights={x:est.get_variable_value(x) for x in est.get_variable_names() if feature in x}
weights

# INFERENCE: GOOGLE CLOUD ML ENGINE

In [None]:
PROJECT="injenia-ricerca"
MODEL=""
VERSION=None

In [None]:
from oauth2client.client import GoogleCredentials
import googleapiclient
import json
import os
import pandas as pd
import base64
import tensorflow as tf
from tensorflow.python.lib.io.file_io import FileIO as open_file

In [None]:
with open_file(os.path.join(MODEL_DIR,"data","latest_model.txt"), "r") as f:
    MODEL_PATH=f.read()
MODEL_PATH
with open_file(os.path.join(MODEL_DIR,"data","dataset_fields.json"), "r") as f:
    inputs_definition=json.load(f)
    
COLUMNS=inputs_definition["fields"]["columns"]
FIELD_TYPES=inputs_definition["fields"]["types"]
FIELD_CATEGORIES=inputs_definition["fields"]["categories"]
LABELS=inputs_definition["label"]["categories"]

In [None]:
with open_file(os.path.join(MODEL_DIR,"data","actual_dataset.csv"), "r") as f:
    df = pd.read_csv(f)
df.columns=[x.strip().replace(" ", "_").replace("_-_", "_") for x in df.columns]
df = df[df["Alignment"].isin(["good","bad","neutral"]) ]
#df = df.drop(["Publisher"], axis=1)
for c in COLUMNS:
    if df[c].dtypes.name == 'bool':
        df[c]=df[c].map({True: 'TRUE', False: 'FALSE'})

In [None]:
superman_feats = list(df[df["index"]=="Superman"].drop(["Alignment"], axis=1).values[0][1:])
feats=superman_feats

In [None]:
def _float_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def build_cmle_request(COLUMNS, FIELD_TYPES, feats):    
    #COLUMNS=data["COLUMNS"]
    #FIELD_TYPES=data["FIELD_TYPES"]
    #feats=data["features"]
    
    feature_dict={}

    for i,c in enumerate(COLUMNS):
        if FIELD_TYPES[c]=="string":
            feature_dict[c]=_bytes_feature(value=feats[i].encode())
        if FIELD_TYPES[c]=="number":
            feature_dict[c]=_float_feature(value=feats[i])


    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    serialized = example.SerializeToString()

    return {"inputs":serialized}
    #return serialized

instances=[build_cmle_request(COLUMNS, FIELD_TYPES, feats)]
instances

In [None]:
def execute_request(project,model,version=None,instances=[]):
    service = googleapiclient.discovery.build('ml', 'v1')
    name = 'projects/{}/models/{}'.format(project, model)
    if version is not None:
        name += '/versions/{}'.format(version)

    response = service.projects().predict(
        name=name,
        body={'instances': instances}
    ).execute()

    return response

resp=execute_request(PROJECT,MODEL,VERSION,instances)
resp