In [79]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
import pandas as pd
from scipy import stats
import numpy as np

In [80]:
CSV_COLUMN_NAMES = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']
QUALITY_GROUPS = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10']

In [81]:
df_train=pd.read_csv('winequality_train.csv', delimiter=";")
df_eval=pd.read_csv('winequality_eval.csv', delimiter=";")

In [82]:
z_scores_train = stats.zscore(df_train)
abs_z_scores_train = np.abs(z_scores_train)
filtered_entries_train = (abs_z_scores_train < 3).all(axis=1)
df_train = df_train[filtered_entries_train]

z_scores_eval = stats.zscore(df_eval)
abs_z_scores_eval = np.abs(z_scores_eval)
filtered_entries_eval = (abs_z_scores_eval < 3).all(axis=1)
df_eval = df_eval[filtered_entries_eval]


In [83]:
df_train.head()
df_eval.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,6.5,0.36,0.31,13.55,0.053,20,113,0.99544,3.2,0.56,11.0,6
1,6.5,0.16,0.33,1.0,0.027,23,75,0.9908,3.3,0.39,11.4,7
3,6.9,0.23,0.35,6.9,0.03,45,116,0.99244,2.8,0.54,11.0,6
4,6.4,0.17,0.34,13.4,0.044,45,139,0.99752,3.06,0.43,9.1,6
5,5.0,0.33,0.18,4.6,0.032,40,124,0.99114,3.18,0.4,11.0,6


In [84]:
y_train=df_train.pop('quality')
y_eval=df_eval.pop('quality')
df_train.head()


Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,8.8
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,9.5
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,10.1
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,9.9


In [85]:
df_train.shape

(4216, 11)

In [86]:
def input_fn(features, labels, training=True, batch_size=32):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    if training:
        dataset = dataset.shuffle(1000).repeat()
    
    return dataset.batch(batch_size)

In [87]:
my_feature_columns = []
for key in df_train.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
print(my_feature_columns)

[NumericColumn(key='fixed_acidity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='volatile_acidity', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='citric_acid', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='residual_sugar', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='chlorides', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='free_sulfur_dioxide', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='total_sulfur_dioxide', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='density', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='pH', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='sulphates', shape=(1,), default_

In [88]:
classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns, hidden_units=[60, 4], n_classes=10)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\Paulina\\AppData\\Local\\Temp\\tmpg3kfn5tu', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [89]:
classifier.train(
    input_fn=lambda: input_fn(df_train, y_train, training=True),
    steps=20000)

p/sec: 365.939
INFO:tensorflow:loss = 17.478565, step = 100 (0.282 sec)
INFO:tensorflow:global_step/sec: 468.539
INFO:tensorflow:loss = 5.566843, step = 200 (0.210 sec)
INFO:tensorflow:global_step/sec: 477.464
INFO:tensorflow:loss = 2.4967225, step = 300 (0.207 sec)
INFO:tensorflow:global_step/sec: 586.36
INFO:tensorflow:loss = 2.3639963, step = 400 (0.169 sec)
INFO:tensorflow:global_step/sec: 701.164
INFO:tensorflow:loss = 2.2649336, step = 500 (0.144 sec)
INFO:tensorflow:global_step/sec: 716.193
INFO:tensorflow:loss = 2.257111, step = 600 (0.139 sec)
INFO:tensorflow:global_step/sec: 711.124
INFO:tensorflow:loss = 2.2557302, step = 700 (0.144 sec)
INFO:tensorflow:global_step/sec: 706.108
INFO:tensorflow:loss = 2.2481675, step = 800 (0.139 sec)
INFO:tensorflow:global_step/sec: 682.09
INFO:tensorflow:loss = 2.2501392, step = 900 (0.147 sec)
INFO:tensorflow:global_step/sec: 716.198
INFO:tensorflow:loss = 2.2436683, step = 1000 (0.139 sec)
INFO:tensorflow:global_step/sec: 721.351
INFO:ten

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x2182ec461f0>

In [90]:
eval_result = classifier.evaluate(
    input_fn=lambda: input_fn(df_eval, y_eval, training=False))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2021-04-24T22:57:18Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from C:\Users\Paulina\AppData\Local\Temp\tmpg3kfn5tu\model.ckpt-20000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.39398s
INFO:tensorflow:Finished evaluation at 2021-04-24-22:57:18
INFO:tensorflow:Saving dict for global step 20000: accuracy = 0.5494506, average_loss = 2.0113869, global_step = 20000, loss = 2.0100348
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 20000: C:\Users\Paulina\AppData\Loca

In [78]:
def predict_input_fn(features, batch_size=32):
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

features = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol']
predict = {}

print("Please enter appropriate values.")
for feature in features:
    valid = False
    while not valid: 
        val = input(feature + ": ")
        if val.isdigit(): valid = True
    predict[feature] = [float(val)]

predictions = classifier.predict(input_fn=lambda: predict_input_fn(predict))
print(predictions)

for pred_dict in predictions:
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" ({:.1f}%)'.format(
        QUALITY_GROUPS[class_id], 100 * probability))


Please enter appropriate values.


KeyboardInterrupt: Interrupted by user