In [13]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.contrib import learn
from tensorflow.contrib.learn.python.learn.estimators import model_fn as model_fn_lib
from sklearn.utils import shuffle

In [14]:
tf.logging.set_verbosity(tf.logging.INFO)

In [15]:
# Import data
COLUMNS = ["media_type", "mode_name", "print_height",
           "print_width","print_id", "R", "G", "B","width","height","w/h","size","print_time_sec"]
FEATURES = ["media_type", "media_waste", "mode_name", "print_height",
           "print_width","print_id", "R", "G", "B","width","height","w/h","size"]
LABEL = "print_time_sec"

data_set = pd.read_csv("data_sheet.csv", skipinitialspace=True,
                           skiprows=1, names=COLUMNS).astype(np.float32)

In [16]:
dummy_fields = ['media_type', 'mode_name','print_id']
for each in dummy_fields:
    dummies = pd.get_dummies(data_set[each], prefix=each, drop_first=False)
    data_set = pd.concat([data_set, dummies], axis=1)
    
fields_to_drop = ['media_type', 'mode_name','print_id']
data = data_set.drop(fields_to_drop, axis=1)
data.to_csv('out.csv')

In [17]:
time = data_set['print_time_sec']
data = data.drop('print_time_sec', axis=1)
data = pd.concat([data, time], axis=1)
data.to_csv('out.csv')
data.shape

(21672, 24)

In [18]:
FEATURES =['print_height', 'print_width', 'R', 'G', 'B', 'width', 'height',
       'w/h', 'size', 'media_type_1.0', 'media_type_2.0',
       'media_type_3.0', 'media_type_4.0', 'media_type_5.0',
       'mode_name_1.0', 'mode_name_2.0', 'mode_name_3.0', 'mode_name_4.0',
       'print_id_1.0', 'print_id_2.0', 'print_id_3.0', 'print_id_4.0',
       'print_id_5.0']

In [19]:
#cut data to three sets
def train_split(data):
    data = shuffle(data)
    total=data.shape[0]
    # split data  to test data 5%
    test = data[-int(0.05*total):]
    # Now remove the test data from the data set 
    data = data[:-int(0.05*total)]
    # split data  to valid data 2%
    valid = data[-int(0.01*total):]
    # Now remove the test data from the data set 
    data = data[:-int(0.01*total)]
    
    return data,test,valid

train,test,valid=train_split(data)

In [20]:
print (valid.shape,test.shape,train.shape)
train.to_csv('train.csv')
test.to_csv('test.csv')
valid.to_csv('valid.csv')

(216, 24) (1083, 24) (20373, 24)


In [21]:
def model_fn(features, targets, mode, params):
  """Model function for Estimator."""

  """input_layer = tf.contrib.layers.fully_connected(features, 
                num_outputs=10,
                weights_initializer = tf.contrib.layers.xavier_initializer(uniform=True),
                weights_regularizer = tf.contrib.layers.l2_regularizer(scale=params["reg"]),
                activation_fn = tf.nn.relu)
    
  #dropout = tf.layers.dropout(inputs=input_layer, rate=0.3, training=mode == learn.ModeKeys.TRAIN)

  output_layer = tf.contrib.layers.fully_connected(input_layer, 
                num_outputs=1,
                weights_initializer = tf.contrib.layers.xavier_initializer(uniform=True),
                weights_regularizer = tf.contrib.layers.l1_regularizer(scale=params["reg"],),
                activation_fn = tf.nn.relu)"""

  # Connect the first hidden layer to input layer
  # (features) with relu activation
    
  first_hidden_layer = tf.contrib.layers.fully_connected(features,
                                                 num_outputs=10,
                                                 weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
                                                 activation_fn=tf.sigmoid)
  # Connect the output layer to second hidden layer (no activation fn)
  output_layer = tf.contrib.layers.linear(first_hidden_layer, 1)


  # Reshape output layer to 1-dim Tensor to return predictions
  predictions = tf.reshape(output_layer, [-1])
  predictions_dict = {"time": predictions}

  # Calculate loss using mean squared error
  loss = tf.losses.mean_squared_error(targets, predictions)

  # Calculate root mean squared error as additional eval metric
  eval_metric_ops = {
      "rmse":
          tf.metrics.root_mean_squared_error(
              tf.cast(targets, tf.float64), predictions)
  }

  train_op = tf.contrib.layers.optimize_loss(
      loss=loss,
      global_step=tf.contrib.framework.get_global_step(),
      learning_rate=params["learning_rate"],
      optimizer="SGD")

  return model_fn_lib.ModelFnOps(
      mode=mode,
      predictions=predictions_dict,
      loss=loss,
      train_op=train_op,
      eval_metric_ops=eval_metric_ops)

In [22]:
LEARNING_RATE=0.2
REGULIZATION = 2.0
# Set model params
model_params = {"learning_rate": LEARNING_RATE,"reg": REGULIZATION }

# Instantiate Estimator
nn = tf.contrib.learn.Estimator(model_fn=model_fn, params=model_params,model_dir='estimator')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': None, '_save_checkpoints_secs': 600, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_task_type': None, '_environment': 'local', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fdb172d1310>, '_tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_num_worker_replicas': 0, '_task_id': 0, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_evaluation_master': '', '_keep_checkpoint_every_n_hours': 10000, '_master': ''}


In [23]:
def train_fn(data_set):
    #data_set = (data_set - data_set.mean()) / (data_set.max() - data_set.min())
    feature_cols = data_set[FEATURES]
    labels = data_set[LABEL]
    return feature_cols, labels
def test_fn(data_set):
    feature_cols = data_set[FEATURES]
    labels = data_set[LABEL]
    return feature_cols, labels
def valid_fn(data_set):
    feature_cols = data_set[FEATURES]
    labels = data_set[LABEL]
    return feature_cols, labels

train_x, train_y = train_fn(train)
test_x, test_y = test_fn(test)
valid_x, valid_y = valid_fn(valid)

In [24]:
# Score accuracy
nn.fit(train_x,train_y, steps=300)
#print("Loss: %s" % ev["loss"])
#print("Root Mean Squared Error: %s" % ev["rmse"])

Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from estimator/model.ckpt-6655
INFO:tensorflow:Saving checkpoints for 6656 into estimator/model.ckpt.
INFO:tensorflow:loss = 2234.58, step = 6656
INFO:tensorflow:global_step/sec: 4.28736
INFO:tensorflow:loss = 2234.51, step = 6756 (23.325 sec)
INFO:tensorflow:global_step/sec: 4.30767
INFO:tensorflow:loss = 223

Estimator(params={'learning_rate': 0.2, 'reg': 2.0})

In [25]:
train_x.head

<bound method DataFrame.head of        print_height  print_width            R            G            B  \
8320     639.995361  1550.035034   44577252.0   43942596.0   42873080.0   
504      455.601990  1629.973511   40172720.0   24940432.0   22035256.0   
9033     711.115906  1449.986816   15230464.0   10914380.0   13827288.0   
15836    355.600006  1499.987793   26207620.0   17506776.0   17896340.0   
15003   1111.123779  1449.986816    4531374.0    4410229.0    4733680.0   
4378    1000.082642  1499.996948   35740800.0   32265118.0   33711436.0   
1039     779.271973  1469.982666   17886904.0   19526310.0   20848636.0   
19962   1099.989380  1499.996948    4480406.0    5000298.0    4250862.0   
7149    1070.038086  1449.986816   25842580.0   25561780.0   24792696.0   
85       376.332123  1580.014526   55755200.0   51938436.0   49114996.0   
8575     659.892029  1469.982666   52473752.0   50485028.0   49423288.0   
14718    820.081360  1549.992676   22016884.0   21223216.0   2029182

In [26]:
train_x.iloc[0:3]

Unnamed: 0,print_height,print_width,R,G,B,width,height,w/h,size,media_type_1.0,...,media_type_5.0,mode_name_1.0,mode_name_2.0,mode_name_3.0,mode_name_4.0,print_id_1.0,print_id_2.0,print_id_3.0,print_id_4.0,print_id_5.0
8320,639.995361,1550.035034,44577252.0,43942596.0,42873080.0,124.0,300.0,0.413333,15.0,1,...,0,1,0,0,0,1,0,0,0,0
504,455.60199,1629.973511,40172720.0,24940432.0,22035256.0,300.0,205.0,1.463415,22.0,0,...,0,0,1,0,0,0,1,0,0,0
9033,711.115906,1449.986816,15230464.0,10914380.0,13827288.0,187.0,300.0,0.623333,27.0,0,...,0,0,0,1,0,0,0,1,0,0


In [27]:
# Score accuracy
ev = nn.evaluate(test_x,test_y, steps=1)
print("Loss: %s" % ev["loss"])
print("Root Mean Squared Error: %s" % ev["rmse"])

Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Starting evaluation at 2017-07-27-09:59:30
INFO:tensorflow:Restoring parameters from estimator/model.ckpt-6955
INFO:tensorflow:Evaluation [1/1]
INFO:tensorflow:Finished evaluation at 2017-07-27-09:59:30
INFO:tensorflow:Saving dict for global step 6955: global_step = 6955, loss = 2282.87, rmse = 47.7794
Loss: 2282.87
Root Mean Squared Error: 47.7794


In [28]:
# Print out predictions
predictions = nn.predict(valid_x, as_iterable=True)
for i, p in enumerate(predictions):
  print("Prediction %s: %s" % (i + 1, p))

Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Restoring parameters from estimator/model.ckpt-6955
Prediction 1: {'time': 65.944727536756744}
Prediction 2: {'time': 65.944727536756744}
Prediction 3: {'time': 65.944727536756744}
Prediction 4: {'time': 65.944727536756744}
Prediction 5: {'time': 65.944727536756744}
Prediction 6: {'time': 65.944727536756744}
Prediction 7: {'time': 65.944727536756744}
Prediction 8: {'time': 65.944727536756744}
Prediction 9: {'time': 65.944727536756744}
Prediction 10: {'time': 65.944727536756744}
Prediction 11: {'time': 65.944727536756744}
Prediction 12: {'time': 65.944727536756744}
Prediction 13: {'time': 65.944727536756744}
Prediction 14: {'time': 65.944727536756744}
Prediction 15: {'time

In [29]:
for i in range(valid_y.shape[0]):
    print(i+1,valid_y.iloc[i])

1 112.0
2 121.0
3 122.0
4 62.0
5 22.0
6 28.0
7 62.0
8 50.0
9 211.0
10 44.0
11 16.0
12 100.0
13 49.0
14 100.0
15 27.0
16 112.0
17 29.0
18 62.0
19 59.0
20 101.0
21 62.0
22 15.0
23 27.0
24 37.0
25 16.0
26 198.0
27 44.0
28 171.0
29 30.0
30 46.0
31 43.0
32 165.0
33 91.0
34 63.0
35 37.0
36 35.0
37 27.0
38 24.0
39 35.0
40 85.0
41 18.0
42 70.0
43 132.0
44 33.0
45 76.0
46 131.0
47 80.0
48 22.0
49 24.0
50 10.0
51 50.0
52 34.0
53 26.0
54 47.0
55 86.0
56 66.0
57 19.0
58 40.0
59 44.0
60 43.0
61 21.0
62 162.0
63 124.0
64 55.0
65 40.0
66 29.0
67 39.0
68 64.0
69 129.0
70 25.0
71 196.0
72 55.0
73 107.0
74 97.0
75 22.0
76 51.0
77 31.0
78 88.0
79 249.0
80 21.0
81 30.0
82 51.0
83 30.0
84 55.0
85 136.0
86 70.0
87 41.0
88 22.0
89 60.0
90 60.0
91 170.0
92 109.0
93 26.0
94 147.0
95 60.0
96 45.0
97 33.0
98 182.0
99 24.0
100 112.0
101 44.0
102 24.0
103 25.0
104 133.0
105 151.0
106 106.0
107 39.0
108 117.0
109 30.0
110 223.0
111 53.0
112 55.0
113 40.0
114 57.0
115 56.0
116 17.0
117 39.0
118 57.0
119 63.0
120 37.