In [14]:
import shutil
import numpy as np
import pandas as pd
import tensorflow as tf
import urllib.parse as urlparse

from helpers_proc import primaryDomain, extFind, process_date
print(tf.__version__)

1.8.0


### Load data and preprocessing

In [15]:
train_df = pd.read_csv('./train.csv', sep=',')
eval_df = pd.read_csv('./eval.csv', sep=',')
train = pd.read_csv('./test.csv', sep=',')

In [17]:
def add_more_features(dataframe):
    df = dataframe.copy()
    
    df['domain_temp'] = ""
    df['domain_temp'] = df.url.apply(lambda row: extFind(row,True))
    df['url_length'] = df.url.apply(lambda row: len(row))
    df['query_string'] = df.url.apply(lambda row: urlparse.urlparse(row).query if urlparse.urlparse(row).query != '' else 'None')
    df['query_length'] = df.query_string.apply(lambda row: len(row))
    df['primary'] = df.domain_temp.apply(lambda row: row[1] if row[0] in ['www','ww1','ww2'] else row[0])
    df['primary_Length'] = df.primary.apply(lambda row: len(row))
    df['num_periods'] = df.url.apply(lambda row: row.count("."))
    df['num_exclam'] = df.url.apply(lambda row: row.count("!"))
    df['num_quest'] = df.url.apply(lambda row: row.count("?"))
    df['num_perc'] = df.url.apply(lambda row: row.count("%"))
    df['num_numbers'] = df.url.apply(lambda row: sum(c.isdigit() for c in row))
    df['last_year_modified'] = process_date(df)
    df = df.drop(["query_string","primary","domain_temp"], axis=1)
    return df 

In [18]:
train_df = add_more_features(train_df)
eval_df = add_more_features(eval_df)
test_df = add_more_features(test_df)

In [25]:
test_df.head()

Unnamed: 0,url,compromissionType,isHiddenFraudulent,contentLength,serverType,poweredBy,contentType,lastModified,url_length,query_length,primary_Length,num_periods,num_exclam,num_quest,num_perc,num_numbers,last_year_modified
0,http://www.sinduscongoias.com.br/index.php/fal...,defacement,False,0,Apache/2.2,Unknown,text/html; charset=utf-8,"Mon, 21 Jan 2013 19:31:19 GMT",55,4,14,4,0,0,0,0,2013
1,http://www.pontoprofissional.com.br/portal/ind...,defacement,False,0,Microsoft-IIS/6.0,Unknown,text/html; charset=utf-8,"Tue, 22 Jan 2013 00:28:49 GMT",100,47,17,4,0,1,0,4,2013
2,http://www.pontoprofissional.com.br/portal/ind...,defacement,False,0,Microsoft-IIS/6.0,Unknown,text/html; charset=utf-8,"Tue, 22 Jan 2013 00:28:56 GMT",100,47,17,4,0,1,0,4,2013
3,http://www.coleyglesias.com/index.html,defacement,False,0,Apache,Unknown,text/html; charset=utf-8,"Tue, 22 Jan 2013 02:30:56 GMT",38,4,12,3,0,0,0,0,2013
4,http://www.coleyglesias.com/index.php?option=c...,defacement,False,0,Apache,Unknown,text/html; charset=utf-8,"Tue, 22 Jan 2013 02:30:59 GMT",98,60,12,3,0,1,0,4,2013


In [7]:
def get_vocabulary_list(category):
    train_cat = list(train_df[category].unique())
    eval_cat = list(eval_df[category].unique())
    test_cat = list(test_df[category].unique())
    return sorted(set(train_cat + eval_cat + test_cat))

In [None]:
len(get_vocabulary_list('serverType'))

In [9]:
# Create pandas input function
def make_train_input_fn(df, batch_size, num_epochs):
    return tf.estimator.inputs.pandas_input_fn(
        x = df,
        y = df['isHiddenFraudulent'],
        batch_size = batch_size,
        num_epochs = num_epochs,
        shuffle = True,
        queue_capacity = 1024,
        num_threads = 1
    )

def make_eval_input_fn(df, batch_size):
    return tf.estimator.inputs.pandas_input_fn(
        x = df,
        y = df['isHiddenFraudulent'], 
        batch_size = 256,
        num_epochs = 1,
        shuffle = False,
        queue_capacity = 1024,
        num_threads = 1
    )

def make_test_input_fn(df):
    return tf.estimator.inputs.pandas_input_fn(
        x = df,
        batch_size = 16,
        num_epochs = 1,
        shuffle = False,
        queue_capacity = 1024,
        num_threads = 1
    )

#### Features to take into account

- query_length / done
- primary_length
- num_periods
- num_quest
- num_perc
- num_numbers

In [10]:
num_int_features = ['url_length', 'contentLength', 'query_length']
category_features = ['serverType', 'poweredBy', 'contentType', 'last_year_modified']

In [11]:
def create_feature_cols(embedding=False):
    
    tf_fc_emb = tf.feature_column.embedding_column
    tf_fc_num = tf.feature_column.numeric_column
    tf_fc_cat = tf.feature_column.categorical_column_with_vocabulary_list
    
    num_cols = [tf_fc_num(col) for col in num_int_features]
    cat_cols = [tf_fc_cat(key=col, 
                          vocabulary_list=get_vocabulary_list(col)) for col in category_features]
    if embedding:
        cat_cols = [tf_fc_emb(col, 16) for col in cat_cols]
    
    cols = num_cols + cat_cols
    
    #try bucketize url_length 5 
    return cols 

feature_cols = create_feature_cols(embedding=True)

In [12]:
def serving_input_fn():
    
    n_int = len(num_int_features)
    n_cat = len(category_features)
    
    num_placeholders = [tf.placeholder(tf.int64, [None]) for i in range(n_int)]
    string_placeholders = [tf.placeholder(tf.string, [None]) for i in range(n_cat)]
    
    feat_names = num_int_features + category_features
    placeholders = num_placeholders + string_placeholders
    
    json_feature_placeholders = dict(zip(feat_names, placeholders))
    
    features = json_feature_placeholders
    
    return tf.estimator.export.ServingInputReceiver(features, json_feature_placeholders)

In [87]:
# def serving_input_fn():
    
#     def get_length(single_string):
#         x = tf.expand_dims(single_string,0)
#         splitted = tf.string_split(x, delimiter="").values
#         length = tf.size(splitted)
#         return length

#     def get_lengths(string_batch):
#         return tf.map_fn(get_length, string_batch, dtype=tf.int32)
    
#     json_feature_placeholders = {
#         'url': tf.placeholder(tf.string, None),
#     }
    
#     features = {
#         'url_length': get_lengths(json_feature_placeholders['url'])
#     }    

#     return tf.estimator.export.ServingInputReceiver(features, json_feature_placeholders)

### Each model function

In [21]:
# Create estimator train and evaluate function
NUM_EPOCH = 20
BATCH_SIZE = 256
SAVE_CKPT_STEPS = 200 # steps
EVAL_INTERVAL_SEC =  5 # sec
HIDDEN_UNITS = [32,8,2]
MAX_STEPS = train_df.shape[0]//BATCH_SIZE * NUM_EPOCH

def train_and_evaluate(output_dir, model='linear'):
    
    run_config = tf.estimator.RunConfig(model_dir=output_dir, 
                                        save_summary_steps=50,
                                        keep_checkpoint_max=10,
                                        save_checkpoints_steps=SAVE_CKPT_STEPS)
    
    if model == 'linear':
        estimator = tf.estimator.LinearClassifier(feature_columns=feature_cols, 
                                               config=run_config)
    else:
        estimator = tf.estimator.DNNClassifier(feature_columns=feature_cols, hidden_units=HIDDEN_UNITS,
                                           config=run_config)

    train_spec = tf.estimator.TrainSpec(input_fn=make_train_input_fn(train_df, BATCH_SIZE, NUM_EPOCH), 
                                        max_steps=MAX_STEPS)

    export_latest = tf.estimator.LatestExporter(name='exporter', 
                                                serving_input_receiver_fn=serving_input_fn,
                                                exports_to_keep=None)

    eval_spec = tf.estimator.EvalSpec(input_fn=make_eval_input_fn(eval_df, BATCH_SIZE), 
                                    steps=None,
                                    start_delay_secs = 1, # start evaluating after N seconds
                                    throttle_secs = EVAL_INTERVAL_SEC,     # evaluate every N seconds
                                    exporters=export_latest
                                    )

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    
    return estimator 

In [None]:
# parameters_name = ['NUM_EPOCH','BATCH_SIZE','SAVE_CKPT_STEPS','EVAL_INTERVAL_SEC','HIDDEN_UNITS','MAX_STEPS']
# parameters_value = [20, 256, 200, 5, [32,8,2], train_df.shape[0]//BATCH_SIZE * NUM_EPOCH]
# params = dict(zip(parameters_name,parameters_value))

In [22]:
outdir = '../trained_model/a_ql_linear_emb/'

In [23]:
shutil.rmtree(outdir, ignore_errors = True)
estimator = train_and_evaluate(outdir, model='linear')

INFO:tensorflow:Using config: {'_model_dir': '../trained_model/a_ql_linear_emb/', '_tf_random_seed': None, '_save_summary_steps': 50, '_save_checkpoints_steps': 200, '_save_checkpoints_secs': None, '_session_config': None, '_keep_checkpoint_max': 10, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12ce64908>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 5 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.

INFO:tensorflow:'classification' : Classification input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_length': <tf.Tensor 'Placeholder_2:0' shape=(?,) dtype=int64>, 'serverType': <tf.Tensor 'Placeholder_3:0' shape=(?,) dtype=string>, 'poweredBy': <tf.Tensor 'Placeholder_4:0' shape=(?,) dtype=string>, 'contentType': <tf.Tensor 'Placeholder_5:0' shape=(?,) dtype=string>, 'last_year_modified': <tf.Tensor 'Placeholder_6:0' shape=(?,) dtype=string>}
INFO:tensorflow:'regression' : Regression input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_length': <tf.Tensor 'Placeholder_2:0' shape=(?,) dtype=int64>, 'serverType': <tf.Tensor 'Placeholder_3:0' shape=(?,) dtype=string>, 'poweredBy': <tf.Tensor 'Placeholder_4:0' shape=(?,) dt

INFO:tensorflow:Finished evaluation at 2019-03-14-00:56:50
INFO:tensorflow:Saving dict for global step 2258: accuracy = 0.83870655, accuracy_baseline = 0.93881804, auc = 0.9129386, auc_precision_recall = 0.58460724, average_loss = 1.6492857, global_step = 2258, label/mean = 0.061181948, loss = 419.99774, precision = 0.26313373, prediction/mean = 0.21275525, recall = 0.9088748
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures EXCLUDED from export because they cannot be be served via TensorFlow Serving APIs:
INFO:tensorflow:'serving_default' : Classification input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_

INFO:tensorflow:loss = 229.17577, step = 2960 (1.030 sec)
INFO:tensorflow:Saving checkpoints for 3060 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:global_step/sec: 153.106
INFO:tensorflow:loss = 1503.3303, step = 3060 (0.653 sec)
INFO:tensorflow:global_step/sec: 190.864
INFO:tensorflow:loss = 255.75136, step = 3160 (0.524 sec)
INFO:tensorflow:Saving checkpoints for 3260 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:global_step/sec: 165.642
INFO:tensorflow:loss = 116.65764, step = 3260 (0.603 sec)
INFO:tensorflow:global_step/sec: 192.768
INFO:tensorflow:loss = 337.55856, step = 3360 (0.519 sec)
INFO:tensorflow:Saving checkpoints for 3460 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:global_step/sec: 160.621
INFO:tensorflow:loss = 453.3019, step = 3460 (0.622 sec)
INFO:tensorflow:Loss for final step: 453.3019.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03

INFO:tensorflow:Restoring parameters from ../trained_model/a_ql_linear_emb/model.ckpt-3968
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b"../trained_model/a_ql_linear_emb/export/exporter/temp-b'1552525035'/saved_model.pb"
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../trained_model/a_ql_linear_emb/model.ckpt-3968
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 3969 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:loss = 830.647, step = 3969
INFO:tensorflow:global_step/sec: 98.6573
INFO:tensorflow:loss = 581.26117, step = 4069 (1.015 sec)
INFO:tensorflow:Saving checkpoints for 4169 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:global_step/sec: 148.917
INFO:tens

INFO:tensorflow:'classification' : Classification input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_length': <tf.Tensor 'Placeholder_2:0' shape=(?,) dtype=int64>, 'serverType': <tf.Tensor 'Placeholder_3:0' shape=(?,) dtype=string>, 'poweredBy': <tf.Tensor 'Placeholder_4:0' shape=(?,) dtype=string>, 'contentType': <tf.Tensor 'Placeholder_5:0' shape=(?,) dtype=string>, 'last_year_modified': <tf.Tensor 'Placeholder_6:0' shape=(?,) dtype=string>}
INFO:tensorflow:'regression' : Regression input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_length': <tf.Tensor 'Placeholder_2:0' shape=(?,) dtype=int64>, 'serverType': <tf.Tensor 'Placeholder_3:0' shape=(?,) dtype=string>, 'poweredBy': <tf.Tensor 'Placeholder_4:0' shape=(?,) dt

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures EXCLUDED from export because they cannot be be served via TensorFlow Serving APIs:
INFO:tensorflow:'serving_default' : Classification input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_length': <tf.Tensor 'Placeholder_2:0' shape=(?,) dtype=int64>, 'serverType': <tf.Tensor 'Placeholder_3:0' shape=(?,) dtype=string>, 'poweredBy': <tf.Tensor 'Placeholder_4:0' shape=(?,) dtype=string>, 'contentType': <tf.Tensor 'Placeholder_5:0' shape=(?,) dtype=string>, 'last_year_modified': <tf.Tensor 'Placeholder_6:0' shape=(?,) dtype=string>}
INFO:tensorflow:'classification'

INFO:tensorflow:loss = 585.8371, step = 7257 (0.531 sec)
INFO:tensorflow:Saving checkpoints for 7346 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:Loss for final step: 320.7989.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-14-00:58:05
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../trained_model/a_ql_linear_emb/model.ckpt-7346
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-14-00:58:06
INFO:tensorflow:Saving dict for global step 7346: accuracy = 0.83793086, accuracy_baseline = 0.93881804, auc = 0.9043923, auc_precision_recall = 0.48563433, average_loss = 3.4676785, global_step = 7346, label/mean = 0.061181948, loss = 883.0593, precision = 0.26151732, prediction/mean = 0.21997961, recall = 0.90412045
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:te

INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b"../trained_model/a_ql_linear_emb/export/exporter/temp-b'1552525095'/saved_model.pb"
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../trained_model/a_ql_linear_emb/model.ckpt-7945
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 7946 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:loss = 1914.096, step = 7946
INFO:tensorflow:global_step/sec: 92.685
INFO:tensorflow:loss = 517.0714, step = 8046 (1.080 sec)
INFO:tensorflow:Saving checkpoints for 8146 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:global_step/sec: 149.215
INFO:tensorflow:loss = 61.056107, step = 8146 (0.670 sec)
INFO:tensorflow:global_step/sec: 188.8
INFO:tensorflow:loss = 123.21312, step = 82

INFO:tensorflow:'classification' : Classification input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_length': <tf.Tensor 'Placeholder_2:0' shape=(?,) dtype=int64>, 'serverType': <tf.Tensor 'Placeholder_3:0' shape=(?,) dtype=string>, 'poweredBy': <tf.Tensor 'Placeholder_4:0' shape=(?,) dtype=string>, 'contentType': <tf.Tensor 'Placeholder_5:0' shape=(?,) dtype=string>, 'last_year_modified': <tf.Tensor 'Placeholder_6:0' shape=(?,) dtype=string>}
INFO:tensorflow:'regression' : Regression input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int64>, 'contentLength': <tf.Tensor 'Placeholder_1:0' shape=(?,) dtype=int64>, 'query_length': <tf.Tensor 'Placeholder_2:0' shape=(?,) dtype=int64>, 'serverType': <tf.Tensor 'Placeholder_3:0' shape=(?,) dtype=string>, 'poweredBy': <tf.Tensor 'Placeholder_4:0' shape=(?,) dt

INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-14-00:58:48
INFO:tensorflow:Saving dict for global step 10287: accuracy = 0.8550928, accuracy_baseline = 0.93881804, auc = 0.9397255, auc_precision_recall = 0.6405482, average_loss = 0.641042, global_step = 10287, label/mean = 0.061181948, loss = 163.24411, precision = 0.28407103, prediction/mean = 0.19834705, recall = 0.90015846
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predict']
INFO:tensorflow:Signatures EXCLUDED from export because they cannot be be served via TensorFlow Serving APIs:
INFO:tensorflow:'serving_default' : Classification input must be a single string Tensor; got {'url_length': <tf.Tensor 'Placeholder:0' shape=(?,) dtype=int

INFO:tensorflow:global_step/sec: 152.001
INFO:tensorflow:loss = 262.6518, step = 10949 (0.658 sec)
INFO:tensorflow:global_step/sec: 166.786
INFO:tensorflow:loss = 683.2887, step = 11049 (0.600 sec)
INFO:tensorflow:Saving checkpoints for 11149 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:global_step/sec: 117.285
INFO:tensorflow:loss = 152.83087, step = 11149 (0.854 sec)
INFO:tensorflow:Saving checkpoints for 11207 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:Loss for final step: 95.60734.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-03-14-00:59:04
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../trained_model/a_ql_linear_emb/model.ckpt-11207
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2019-03-14-00:59:05
INFO:tensorflow:Saving dict for global step 11207: accuracy = 

INFO:tensorflow:Restoring parameters from ../trained_model/a_ql_linear_emb/model.ckpt-11793
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b"../trained_model/a_ql_linear_emb/export/exporter/temp-b'1552525153'/saved_model.pb"
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../trained_model/a_ql_linear_emb/model.ckpt-11793
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 11794 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:loss = 449.7841, step = 11794
INFO:tensorflow:global_step/sec: 101.475
INFO:tensorflow:loss = 701.5953, step = 11894 (0.986 sec)
INFO:tensorflow:Saving checkpoints for 11920 into ../trained_model/a_ql_linear_emb/model.ckpt.
INFO:tensorflow:Loss for final step: 1806.12

In [None]:
outdir = '../learning_rate_study/{}'.format(i)

### Serving 

In [99]:
url_ex = "https://github.com/warmspringwinds/tf-image-segmentation/issues/10"
len(url_ex)

66

In [100]:
%%writefile url.json
{"url_lenght": 66}

Overwriting url.json


In [None]:
%%bash

location=/Users/paul/Desktop/Google_ASL/trained_model/url_len_dnn/export/exporter/1552430670
gcloud ml-engine local predict --model-dir=$location --json-instances=url.json --verbosity debug 

#### Load models

In [41]:
import tensorflow as tf

output_dir = '../trained_model/url_len_linear/'

def create_feature_cols():
    cols = [tf.feature_column.numeric_column('url_length')]
    #try bucketize url_length 5 
    return cols 

feature_cols = create_feature_cols()

run_config = tf.estimator.RunConfig(model_dir=output_dir, 
                                  keep_checkpoint_max=10,
                                  save_summary_steps=100, 
                                  save_checkpoints_steps=200)

estimator = tf.estimator.LinearClassifier(feature_columns=feature_cols, 
                                       config=run_config)

INFO:tensorflow:Using config: {'_model_dir': '../trained_model/url_len_linear/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 200, '_save_checkpoints_secs': None, '_session_config': None, '_keep_checkpoint_max': 10, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12e068860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [42]:
predictions = estimator.predict(input_fn=make_test_input_fn(test_df))

In [43]:
list(predictions)[:20]

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ../trained_model/url_len_linear/model.ckpt-7000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


[{'logits': array([0.9270397], dtype=float32),
  'logistic': array([0.7164743], dtype=float32),
  'probabilities': array([0.28352568, 0.7164743 ], dtype=float32),
  'class_ids': array([1]),
  'classes': array([b'1'], dtype=object)},
 {'logits': array([-0.37803602], dtype=float32),
  'logistic': array([0.40660068], dtype=float32),
  'probabilities': array([0.59339935, 0.40660068], dtype=float32),
  'class_ids': array([0]),
  'classes': array([b'0'], dtype=object)},
 {'logits': array([-0.09155598], dtype=float32),
  'logistic': array([0.477127], dtype=float32),
  'probabilities': array([0.52287304, 0.477127  ], dtype=float32),
  'class_ids': array([0]),
  'classes': array([b'0'], dtype=object)},
 {'logits': array([-0.20296489], dtype=float32),
  'logistic': array([0.44943225], dtype=float32),
  'probabilities': array([0.55056775, 0.44943225], dtype=float32),
  'class_ids': array([0]),
  'classes': array([b'0'], dtype=object)},
 {'logits': array([0.9270397], dtype=float32),
  'logistic': 