In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import feature_column
from tensorflow.keras import layers

In [3]:
data = {'marks':[55,21,63,88,74,54,95,41,84,52],
        'grade':['average','poor','average','good','good','average','good','average','good','average'],
        'point':['c','f','c+','b+','b','c','a','d+','b+','c']}

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,marks,grade,point
0,55,average,c
1,21,poor,f
2,63,average,c+
3,88,good,b+
4,74,good,b
5,54,average,c
6,95,good,a
7,41,average,d+
8,84,good,b+
9,52,average,c


A utility method to show transformation from feature coloumn

In [5]:
def demo(feature_column):
    feature_layer = layers.DenseFeatures(feature_column)
    print(feature_layer(data).numpy())

In [6]:
marks = feature_column.numeric_column("marks")
demo(marks)

[[55.]
 [21.]
 [63.]
 [88.]
 [74.]
 [54.]
 [95.]
 [41.]
 [84.]
 [52.]]


In [9]:
mark_buckets = feature_column.bucketized_column(marks,boundaries = [30,40,50,60,70,80,90])
demo(mark_buckets)

[[0. 0. 0. 1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]]


In [7]:
grade = feature_column.categorical_column_with_vocabulary_list('grade',['poor','average','good'])
grade_one_hot = feature_column.indicator_column(grade)
demo(grade_one_hot)

[[0. 1. 0.]
 [1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]
 [0. 0. 1.]
 [0. 1. 0.]]


In [10]:
point = feature_column.categorical_column_with_vocabulary_list('point', df['point'].unique())
point_one_hot = feature_column.indicator_column(point)
demo(point_one_hot)


[[1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0.]]


The input to the embedding column is the categorical column

In [16]:
point_embedding = feature_column.embedding_column(point,dimension = 4)
demo(point_embedding)

[[ 0.0384841  -0.23278144 -0.41532314  0.8692275 ]
 [-0.5046904  -0.6346681   0.54887253  0.78975034]
 [ 0.3510972  -0.30516034  0.13907273 -0.01665784]
 [-0.8513122  -0.14177276 -0.06480829 -0.9019053 ]
 [-0.38479728  0.26229852 -0.3750753   0.10349771]
 [ 0.0384841  -0.23278144 -0.41532314  0.8692275 ]
 [-0.68880725  0.4615699   0.5323758  -0.28481877]
 [-0.35976374  0.50586504  0.02167026 -0.22984824]
 [-0.8513122  -0.14177276 -0.06480829 -0.9019053 ]
 [ 0.0384841  -0.23278144 -0.41532314  0.8692275 ]]
