In [16]:
import tensorflow as tf
from tensorflow.python.estimator.inputs import numpy_io
import numpy as np
import collections
from tensorflow.python.framework import errors
from tensorflow.python.platform import test
from tensorflow.python.training import coordinator
from tensorflow import feature_column

from tensorflow.python.feature_column.feature_column import _LazyBuilder
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"]='3'

In [33]:
def test_bucketized_column():
    price = {'price': [[5.], [15.], [25.], [35.]],'age':[[10],[20],[30],[40]]}  # 4行样本
    price_column = feature_column.numeric_column('price')
    age_column = feature_column.numeric_column('age')
    bucket_price = feature_column.bucketized_column(price_column, [0, 10, 20, 30, 40])

    price_bucket_tensor = feature_column.input_layer(price, [bucket_price])
    price_tensor = feature_column.input_layer(price, [price_column, age_column, bucket_price])

    with tf.Session() as session:
        print(session.run([price_bucket_tensor]))
        print(session.run([price_tensor]))


test_bucketized_column()


[array([[0., 1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 1., 0.]], dtype=float32)]
[array([[10.,  5.,  0.,  1.,  0.,  0.,  0.,  0.],
       [20., 15.,  0.,  0.,  1.,  0.,  0.,  0.],
       [30., 25.,  0.,  0.,  0.,  1.,  0.,  0.],
       [40., 35.,  0.,  0.,  0.,  0.,  1.,  0.]], dtype=float32)]


In [54]:
def test_categorical_column_with_vocabulary_list():

    #color_data = {'color': [['R'], ['G'], ['B'], ['A']]}  # 4行样本
    #color_data = {'color': [['R', 'R'], ['G', 'R'], ['B', 'G'], ['A', 'A']]}  # 4行样本
    color_data = {'color': [['R', 'R','R', 'G'], ['G', 'R','R', 'G'], ['B', 'G','R', 'G'], ['A', 'A','R', 'G']]}  # 4行样本

    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B','A'], dtype=tf.string, default_value=-1
    )

    # 将稀疏的转换成dense，也就是one-hot形式，只是multi-hot
    color_column_identy = feature_column.indicator_column(color_column)

    color_dense_tensor = feature_column.input_layer(color_data, [color_column_identy])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())

        session.run(tf.tables_initializer())

        print(session.run([color_dense_tensor]))

test_categorical_column_with_vocabulary_list()

[array([[3., 1., 0., 0.],
       [2., 2., 0., 0.],
       [1., 2., 1., 0.],
       [1., 1., 0., 2.]], dtype=float32)]


In [6]:
def test_categorical_column_with_hash_bucket():

    color_data = {'color': [['R','G'], ['G','B'], ['B','A'], ['A','G']]}  # 4行样本
    color_column = feature_column.categorical_column_with_hash_bucket('color', 10)

    # 将稀疏的转换成dense，也就是one-hot形式，只是multi-hot
    color_column_identy = feature_column.indicator_column(color_column)

    color_dense_tensor = feature_column.input_layer(color_data, [color_column_identy])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())

        session.run(tf.tables_initializer())

        print(session.run([color_dense_tensor]))
        
test_categorical_column_with_hash_bucket()


[array([[0., 1., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 1., 0.],
       [0., 1., 0., 0., 1., 0., 0., 0., 0., 0.]], dtype=float32)]


In [10]:
def test_embedding():
    color_data = {'color': [['R'], ['G'], ['B'], ['A']]}  # 4行样本
    color_column = feature_column.categorical_column_with_vocabulary_list(
        'color', ['R', 'G', 'B'], dtype=tf.string, default_value=-1
    )

    color_embeding = feature_column.embedding_column(color_column, 5)
    color_embeding_dense_tensor = feature_column.input_layer(color_data, [color_embeding])

    with tf.Session() as session:
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())

        print(session.run([color_embeding_dense_tensor]))

test_embedding()


[array([[-0.04973869,  0.20958067,  0.17388345, -0.13264756, -0.11004642],
       [-0.82928395, -0.78619385,  0.70044094, -0.4077269 ,  0.3656607 ],
       [ 0.05769355,  0.55874443, -0.37802103,  0.2991749 ,  0.10224926],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]],
      dtype=float32)]


In [18]:
def test_crossed_column():
    """
    crossed column测试
    :return:
    """
    featrues = {
        'price': [['A', 'A'], ['B', 'D'], ['C', 'A']],
        'color': [['R', 'R'], ['G', 'G'], ['B', 'B']]
    }

    price = feature_column.categorical_column_with_vocabulary_list('price', ['A', 'B', 'C', 'D'])
    color = feature_column.categorical_column_with_vocabulary_list('color', ['R', 'G', 'B'])
    p_x_c = feature_column.crossed_column([price, color], 16)

    p_x_c_identy = feature_column.indicator_column(p_x_c)
    p_x_c_identy_dense_tensor = feature_column.input_layer(featrues, [p_x_c_identy])
    
    price_column_identy = feature_column.indicator_column(price)
    price_dense_tensor = feature_column.input_layer(featrues, [price_column_identy])
    
    color_column_identy = feature_column.indicator_column(color)
    color_dense_tensor = feature_column.input_layer(featrues, [color_column_identy])


    with tf.Session() as session:
        session.run(tf.global_variables_initializer())

        session.run(tf.tables_initializer())

        print('use input_layer' + '_' * 40)
        print(session.run([p_x_c_identy_dense_tensor]))
        
        print('price' + '_' * 40)
        print(session.run([price_dense_tensor]))
        
        print('color' + '_' * 40)
        print(session.run([color_dense_tensor]))

test_crossed_column()

use input_layer________________________________________
[array([[0., 0., 0., 0., 0., 0., 0., 0., 4., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 2., 0., 0., 0., 0., 2., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 2., 0.]],
      dtype=float32)]
price________________________________________
[array([[2., 0., 0., 0.],
       [0., 1., 0., 1.],
       [1., 0., 1., 0.]], dtype=float32)]
color________________________________________
[array([[2., 0., 0.],
       [0., 2., 0.],
       [0., 0., 2.]], dtype=float32)]
