In [17]:
#https://github.com/backstopmedia/tensorflowbook/blob/master/chapters/04_machine_learning_basics/logistic_regression.py
import os
import time
import tensorflow as tf
import numpy as np


In [18]:
W = tf.Variable(tf.zeros([5,1]),name='weight')
b = tf.Variable(0.,name="bias")

In [19]:
def combine_inputs(X):
    return tf.matmul(X,W) + b

def read_csv(batch_size,file_name,record_defaults):
    filename_queue = tf.train.string_input_producer([os.path.join(os.getcwd(), file_name)])

    reader = tf.TextLineReader(skip_header_lines=1)
    key, value = reader.read(filename_queue)

    # decode_csv will convert a Tensor from type string (the text line) in
    # a tuple of tensor columns with the specified defaults, which also
    # sets the data type for each column
    decoded = tf.decode_csv(value, record_defaults=record_defaults)

    # batch actually reads the file and loads "batch_size" rows in a single tensor
    return tf.train.shuffle_batch(decoded,
                                  batch_size=batch_size,
                                  capacity=batch_size * 50,
                                  min_after_dequeue=batch_size) 

In [28]:
def inference(X):
    """推断模型"""
    return tf.sigmoid(combine_inputs(X)) 

def loss(X,Y):
    """交叉熵(cross entropy) 损失函数
    sum( y[i]*log(y_predicted[i]) + (1-y[i]*log(1-y_predicted[i])) = )"""
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=combine_inputs(X), labels=Y))

    # ValueError: Only call `sigmoid_cross_entropy_with_logits` with named arguments (labels=..., logits=..., ...) 
    # return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( combine_inputs(X),Y ))

def inputs():
    """读取训练数据"""
    passenger_id, survived, pclass, name, sex, age, sibsp, parch, ticket, fare, cabin, embarked = \
        read_csv(100, "data/titanic_train.csv", [[0.0], [0.0], [0], [""], [""], [0.0], [0.0], [0.0], [""], [0.0], [""], [""]])

    # convert categorical data
    is_first_class = tf.to_float(tf.equal(pclass, [1]))
    is_second_class = tf.to_float(tf.equal(pclass, [2]))
    is_third_class = tf.to_float(tf.equal(pclass, [3]))

    gender = tf.to_float(tf.equal(sex, ["female"]))

    # Finally we pack all the features in a single matrix;
    # We then transpose to have a matrix with one example per row and one feature per column.
    features = tf.transpose(tf.stack([is_first_class, is_second_class, is_third_class, gender, age]))
    survived = tf.reshape(survived, [100, 1])
    
    #AttributeError: module 'tensorflow' has no attribute 'pack'
    #pack -> stack
    
    return features, survived

def train(total_loss):
    """依据 计算的总损失 训练或调整 模型参数"""
    learning_rate = 0.01
    return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)

def evaluate(sess,X,Y):
    """对训练得到的模型进行评估"""
    predicted = tf.cast(inference(X)>0.5,tf.float32)
    print( sess.run( tf.reduce_mean( tf.cast( tf.equal(predicted,Y),tf.float32)) ) )
    

In [29]:
# saver = tf.train.Saver() #设置检查点

with tf.Session() as sess:
    tf.global_variables_initializer().run()

    X, Y = inputs()

    total_loss = loss(X, Y)
    train_op = train(total_loss)

    coord = tf.train.Coordinator() #??
    threads = tf.train.start_queue_runners(sess=sess, coord=coord) #?

    # actual training loop
    training_steps = 1000
    for step in range(training_steps):
        sess.run([train_op])
        # for debugging and learning purposes, see how the loss gets decremented thru training steps
        if step % 10 == 0:
            print("loss: ", sess.run([total_loss]))
#             saver.save(sess,'my-model',global_step=step)
            

    evaluate(sess, X, Y)
    
    time.sleep(5)

    coord.request_stop()
    coord.join(threads)
    
#     saver.save(sess,"my-model",global_step=training_steps)
    
    sess.close() 
    

loss:  [0.74112856]
loss:  [0.69459045]
loss:  [0.7059314]
loss:  [0.7709655]
loss:  [0.6284484]
loss:  [0.65015]
loss:  [0.8081596]
loss:  [0.706176]
loss:  [0.6128902]
loss:  [0.6996337]
loss:  [0.6913719]
loss:  [0.6345721]
loss:  [0.6593137]
loss:  [0.6463507]
loss:  [0.75401735]
loss:  [0.67709243]
loss:  [0.630373]
loss:  [0.69942063]
loss:  [0.64196306]
loss:  [0.62336695]
loss:  [0.8259187]
loss:  [0.6907437]
loss:  [0.64609003]
loss:  [0.63631785]
loss:  [0.6974695]
loss:  [0.62128085]
loss:  [0.6258336]
loss:  [0.63927764]
loss:  [0.5952814]
loss:  [0.8654955]
loss:  [0.60920805]
loss:  [0.68913984]
loss:  [0.80222887]
loss:  [0.6925276]
loss:  [0.59289837]
loss:  [0.6356472]
loss:  [0.8292947]
loss:  [0.6576491]
loss:  [0.7511002]
loss:  [0.63935095]
loss:  [0.5987729]
loss:  [0.59163004]
loss:  [0.5724953]
loss:  [0.59519696]
loss:  [0.6857043]
loss:  [0.71730745]
loss:  [0.5937946]
loss:  [0.64771914]
loss:  [0.6466329]
loss:  [0.5734243]
loss:  [1.0825099]
loss:  [0.54768