In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

In [21]:
def load_data(path):
    '''加载数据集
    
    Paremeters
    ----
    path:str
    
    Returns
    ------
    (X,y):tuple
    特征矩阵X和对应标签y
    '''
    
    # 数据集中没有标题行
    data = pd.read_csv(path, header=None)
    # 将数据分为X和y
    X,y = data.iloc[:, :-1], data.iloc[:, -1]
    # 处理特征矩阵X，首先进行编码，把字符串变成数字
    lb = LabelEncoder()
    X = X.apply(lambda col: lb.fit_transform(col))
    # 进行one-hot编码，去除大小影响
    # 0 -> [0,0,0,1] 1->[0,0,1,0] 2->[0,1,0,0] 3->[1,0,0,0]
    # 花色一列扩展到四列，点数一列扩展到十三列
    ohe = OneHotEncoder()
    X = pd.DataFrame(ohe.fit_transform(X).toarray())
    # tensorflow 不支持数字列名
    X.columns = X.columns.map(lambda x: "".join(("c",str(x))))
    
    return X,y

def train_input_fn(features, labels):
    '''训练函数
    
    Parameters
    ----
    features: 矩阵
    labels:标签
    
    Returns
    ----
    dataset: tf.data.Dataset
    '''
    # 创建数据集
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    # 洗牌 重复 批处理
    dataset = dataset.shuffle(10000, seed=0).repeat(10).batch(50)
    return dataset

def eval_input_fn(features, labels=None):
    '''
    测试
    '''
    # 转换类型
    features = dict(features)
    # 如果要进行预测，就没有label，如果是评估，就有label
    if labels is None:
        inputs = features
    else:
        inputs = (features, labels)
    # 创建数据集
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
    # 每次取出100
    dataset = dataset.batch(100)
    return dataset

X, y = load_data(r"facebook.csv")
# 从总数据集里拆出测试集和训练集
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.25, random_state=0)

my_feature_columns = []

for key in train_X.keys():
    # 创建tensorflow的特征咧
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))
    
# SGD 随机梯度下降
classifier = tf.estimator.DNNClassifier(feature_columns=my_feature_columns, hidden_units=[512] * 2, n_classes=10, optimizer="SGD")
classifier.train(input_fn=lambda: train_input_fn(train_X, train_y))
classifier.evaluate(input_fn=lambda: eval_input_fn(test_X, test_y))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_summary_steps': 100, '_task_type': 'worker', '_master': '', '_task_id': 0, '_num_ps_replicas': 0, '_is_chief': True, '_save_checkpoints_steps': None, '_service': None, '_global_id_in_cluster': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f98b25f25c0>, '_model_dir': '/tmp/tmps7apx7cm', '_tf_random_seed': None, '_log_step_count_steps': 100, '_keep_checkpoint_max': 5, '_num_worker_replicas': 1, '_keep_checkpoint_every_n_hours': 10000, '_evaluation_master': '', '_session_config': None, '_save_checkpoints_secs': 600}
INFO:tensorflow:Calling model_fn.


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmps7apx7cm/model.ckpt.
INFO:tensorflow:step = 1, loss = 116.49904
INFO:tensorflow:global_step/sec: 87.0945
INFO:tensorflow:step = 101, loss = 46.601715 (1.149 sec)
INFO:tensorflow:global_step/sec: 119.118
INFO:tensorflow:step = 201, loss = 50.133774 (0.839 sec)
INFO:tensorflow:global_step/sec: 129.546
INFO:tensorflow:step = 301, loss = 46.32362 (0.773 sec)
INFO:tensorflow:global_step/sec: 95.7944
INFO:tensorflow:step = 401, loss = 51.374577 (1.044 sec)
INFO:tensorflow:global_step/sec: 106.926
INFO:tensorflow:step = 501, loss = 49.173717 (0.936 sec)
INFO:tensorflow:global_step/sec: 119.935
INFO:tensorflow:step = 601, loss = 52.120735 (0.832 sec)
INFO:tensorflow:global_step/sec: 126.483
INFO:tensorflow:step = 701, loss = 46.24914 (0.792 

{'accuracy': 0.9912042,
 'average_loss': 0.05025649,
 'global_step': 3752,
 'loss': 4.9881563}