# Premade Estimator

## Train, Valid, Test Split

In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import pandas as pd

from sklearn.model_selection import train_test_split

import json

In [2]:
from platform import python_version
print(python_version())

3.7.6


In [3]:
wines_df = pd.read_csv("../data/winequality.csv")

In [None]:
wines_df.head(2)

In [4]:
# tf doesn't like spaces in col names so I replace them with _ 
new_col_list = []
for col_name in wines_df.columns:
    new_col_names = col_name.replace(" ", "_")
    new_col_list.append(new_col_names)
print(new_col_list)
wines_df.columns = new_col_list


['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']


In [5]:
# defining a few helpful constants for parsing the dataset

CSV_COLUMN_NAMES = ['fixed_acidity', 'volatile_acidity', 'citric_acid', 'residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']
QUALITIES = [3, 4, 5, 6, 7, 8]

In [6]:
count_classes = len(QUALITIES)

In [None]:
from sklearn.model_selection import train_test_split
wines_df = wines_df.sample(frac=1) # shuffle data
df_dev, test = train_test_split(wines_df, test_size=0.15)
train, valid = train_test_split(df_dev, test_size=0.15)

In [None]:
train_y = train.pop('quality')
test_y = test.pop('quality')

# The target label column has now been removed from the features.
train.head()

In [None]:
train.iloc[0:3,10].to_numpy()

In [None]:
wines_df.head(3)

In [None]:
# json

data_srs = wines_df.iloc[0,0:-1]

my_json = data_srs.to_json()

my_json

## Create input functions

You must create input functions to supply data for training, evaluating, and prediction.

An input function is a function that returns a tf.data.Dataset object which outputs the following two-element tuple:

* features - A Python dictionary in which:
    * Each key is the name of a feature.
    * Each value is an array containing all of the feature's values.
* label - An array containing the values of the label for every example.

Just to demonstrate the format of the input function, here's a simple implementation:

In [None]:
def input_evaluation_set():
    features = {'fixed_acidity': np.array([6.9, 6.2, 7.1]),
                'volatile_acidity': np.array([0.685, 0.58 , 0.43 ]),
                'citric_acid': np.array([0.  , 0.  , 0.42]),
                'residual_sugar': np.array([2.5, 1.6, 5.5]),
                'chlorides': np.array([0.105, 0.065, 0.07]),
                'free_sulfur_dioxide': np.array([22.,  8., 29.]),
                'total_sulfur_dioxide': np.array([37.,  18., 129.]),
                'density': np.array([0.9966, 0.9966, 0.9973]),
                'pH': np.array([3.46, 3.56, 3.42]),
                'sulphates': np.array([0.57, 0.84, 0.72]),
                'alcohol':np.array([10.6,  9.4, 10.5])}
    labels = np.array([6, 5, 6])
    return features, labels

Your input function may generate the features dictionary and label list any way you like. However, we recommend using TensorFlow's Dataset API, which can parse all sorts of data.

The Dataset API can handle a lot of common cases for you. For example, using the Dataset API, you can easily read in records from a large collection of files in parallel and join them into a single stream.

To keep things simple in this example you are going to load the data with pandas, and build an input pipeline from this in-memory data:

**features (dict)** keys: CSV_COLUMN_NAMES [0:-1] (namely excluding the label of the target variable) , values: np.arrays of the features values 

**labels (np.array)** the values of the target variable

In [None]:
CSV_COLUMN_NAMES[0:-1]

In [None]:
def input_fn(features = CSV_COLUMN_NAMES[0:-1] , labels = QUALITIES, training=True, batch_size=41):
    """ 
    
    An input function for the training and evaluation procedures 
    
    """
    # Convert the inputs to a Dataset. 
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    # Shuffle and repeat if you are in training mode.
    if training:
        dataset = dataset.shuffle(1000).repeat()
    
    return dataset.batch(batch_size)
        
                                                 
                                                 

## Define the feature columns

In [None]:
# Feature columns describe how to use the input.
my_feature_columns = []
for key in train.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))

## Instantiate an estimator

In [None]:
# Build a DNN with 2 hidden layers with 30 and 10 hidden nodes each.
classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    # Two hidden layers of 30 and 10 nodes respectively.
    hidden_units=[30,10],
    # The model must choose between 6 classes. [3-8]
    n_classes=count_classes)

## Train, Evaluate and Predict

In [None]:
# Train the Model.

classifier.train(
    input_fn=lambda: input_fn(train, train_y, training=True),
    steps=41)

In [None]:
eval_result = classifier.evaluate(input_fn=lambda: input_fn(test, test_y, training=False))
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

Unlike the call to the train method, *you did not pass the steps argument to evaluate*. The input_fn for eval only yields a **single epoch** of data.

The eval_result dictionary also contains *the average_loss* (mean loss per sample), *the loss* (mean loss per mini-batch) and the value of the *estimator's global_step* (the number of training iterations it underwent).

In [None]:
print(eval_result)

## Making predictions (inferring) from the trained model

In [None]:
# Generate predictions from the model
expected = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
predict_x = {
    'fixed_acidity': [7.1, 5.6, 0.7],
    'volatile_acidity': [0.150, 0.760, 0,352],
    'citric_acid': [0.0, 0.25, 0.13],
    'residual_sugar': [0.3, 1.5, 2.4],
    'chlorides': [0.034, 0.012, 0.056],
    'free_sulfur_dioxide': [14.0, 12.0, 15.0],
    'total_sulfur_dioxide':[45.0, 12.0, 56.0],
    'density':[0.98334, 0.96423, 0.9731],
    'pH':[3.12, 3.56, 3.78],
    'sulphates':[0.56, 0.75, 0.67],
    'alcohol':[12.5, 11.2, 10.3]


}

def input_fn(features, batch_size=256):
    """An input function for prediction."""
    # Convert the inputs to a Dataset without labels.
    return tf.data.Dataset.from_tensor_slices(dict(features)).batch(batch_size)

predictions = classifier.predict(
    input_fn=lambda: input_fn(predict_x))

In [None]:
print(predictions.)

The predict method returns a Python iterable, yielding a dictionary of prediction results for each example. The following code prints a few predictions and their probabilities:

In [None]:
for pred_dict, expec in zip(predictions, expected):
    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print('Prediction is "{}" ({:.1f}%), expected "{}"'.format(
        SPECIES[class_id], 100 * probability, expec))

# Custom Estimator 

1. input_func : transforms raw data to Dataset objects.

2. feature_func : function that defines the feature cols of the datasets

3. model_func : heart of the estimator. This func specifies the type of model used to make predictions and its characteristics e.g DNN with k layers so on and so forth

4. train_func, eval_func, test_func : functions relevant to implement the training, evaluation and testing procedures.


## 

## 

## 

## 

check the options of the estimator to be either a function or a class.

In [None]:
class my_estimator(input_func, feat_func, model_func, train_func, valid_func, test_func):
    pass

## Input_func

Importance (why we need an input func in our workflow?)


Functionality (what does an input func do?)


Implementation (how does the input func do what it is supposed to do?)

In [None]:
input_func(csv) ----> [train_set, valid_set, test_set]

In [None]:
def input_func():
    ...  # manipulate dataset, extracting the feature dict and the label
    return feature_dict, label


## Feature_func

feature_func(csv_header) ------> (features, target)



* We need to define the data type for every attribute column.

* We need to normalize each attribute according to its type and value.

In [None]:
# Define the feature columns including their names and type of data they contain.

def feature_func(csv_header):

    population = tf.feature_column.numeric_column('population')
    crime_rate = tf.feature_column.numeric_column('crime_rate')
    median_education = tf.feature_column.numeric_column(
        'median_education', normalizer_fn=lambda x: x - global_education_mean)

## Model_func or Model_class? probably the 

In [None]:
model_func(feature_columns, hidden_units = [ some_layer_1_nodes , ... , some_layer_n_nodes], n_classes = 8 ) -----> wine.classifier

In [None]:
# Instantiate an estimator, by passing in the feature columns.


def model_func(feature_columns, hidden_units = [ some_layer_1_nodes , ... , some_layer_n_nodes], n_classes = 8 ):
    # using premade at first then extend it to custom
    wine_classifier = 

    
    

class BPSomeClass(object):
    """Brief class description
    
    Some more extensive description
    
    Attributes
    ----------
    attr1 : string
        Purpose of attr1.
    attr2 : float
        Purpose of attr2.
    
    """
    
    def __init__(self, param1, param2, param3=0):
        """Example of docstring on the __init__ method.
        
        Parameters
        ----------
        param1 : str
            Description of `param1`.
        param2 : float
            Description of `param2`.
        param3 : int, optional
            Description of `param3`, defaults to 0.
        
        """
        self.attr1 = param1
        self.attr2 = param2
        print(param3 // 4)
    
    @property
    def attribute2(self):
        return self.attr2
    
    @attribute2.setter
    def attribute2(self, new_attr2):
        if not isinstance(float, new_attr2):
            raise ValueError("attribute2 must be a float, not {0}".format(new_attr2))
        self.attr2 = new_attr2


bp_obj = BPSomeClass("a", 1.618)
print(bp_obj.attribute2)
bp_obj.attribute2 = 3.236

















# Wine.Classifier Methods

## train_method

In [None]:
# `input_fn` is the function created in Step 1

def train_func(arg):
    estimator.train(input_func=train_set, steps=2000)
    pass


## val_method

In [None]:
def eval_func(arg):
    estimator.eval(input_func=eval_set, .....)
    pass


## test_method


In [None]:
def test_func(arg):
    estimator.test(input_func=test_set, .....)
    pass