In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def splitDataToTrainAndTest(data_df,train_frac):
    from sklearn.cross_validation import train_test_split
    train = data_df.sample(frac=train_frac,random_state=1)
    test = data_df.loc[~data_df.index.isin(train.index)]
    return train,test
def convertToBelegung(normalized_value,max_value):
    return ((normalized_value - 0.01)/.99)*max_value

class FullDataSet:
    def __init__(self):
        self.test = DataSet()
        self.training = DataSet()
    def setTrain_input(self,data):
        self.training.inputData = data
    def setTrain_output(self,data):
        self.training.outputData = data
    def setTest_input(self,data):
        self.test.inputData = data
    def setTest_output(self,data):
        self.test.outputData = data
    def getNumberInputs(self):
        return self.test.inputData.shape[1]
class DataSet:
    def __init__(self):
        self.inputData = []
        self.outputData = []
    def next_batch(self,batch_size):
        b_in = self.inputData[np.random.choice(self.inputData.shape[0],batch_size,replace=False),:]
        b_out = self.outputData[np.random.choice(self.outputData.shape[0],batch_size, replace=False),:]
        return b_in,b_out
    def num_examples(self):
        return self.inputData.shape[0]
def makeDataSet():
    all_data = pd.read_csv('26_8_16_PZS_Belgugung_All_Wide_NanOmited.csv',sep=",")
    data_df = all_data.iloc[:,2:all_data.shape[1]]
    max_value = np.amax(data_df.values)
    data_df = ((data_df/max_value)*.99) + 0.01
    train_df, test_df = splitDataToTrainAndTest(data_df,0.8)

    index_of_Output = 0
    train_df_toUse = train_df.drop(train_df.columns[[index_of_Output]],axis=1)
    test_df_toUse = test_df.drop(test_df.columns[[index_of_Output]],axis=1)
    test_output_df = test_df.iloc[:,index_of_Output]
    train_output_df = train_df.iloc[:,index_of_Output]
    
    theData = FullDataSet()
    theData.setTrain_input(train_df_toUse.values)
    theData.setTest_input(test_df_toUse.values)
    theData.setTrain_output(train_output_df.values.reshape(-1,1))
    theData.setTest_output(test_output_df.values.reshape(-1,1))
    return theData

In [3]:
def inference(x,number_input_nodes,number_hidden_nodes,number_output_nodes):
    with tf.name_scope('input-hidden'):
        weights = weight_variable([number_input_nodes,number_hidden_nodes])
        bias = bias_variable([number_hidden_nodes])
        input_hidden = tf.nn.relu(tf.matmul(x,weights)+bias)
    
    with tf.name_scope('hidden-output'):
        weights = weight_variable([number_hidden_nodes,number_output_nodes])
        bias = bias_variable([number_output_nodes])
        hidden_output = tf.nn.relu(tf.matmul(input_hidden,weights)+bias)
    return hidden_output
def loss(output,real):
    final_error = tf.square(tf.sub(real,output),name="myError")
    tf.histogram_summary("final_error",final_error)
    return final_error
def training(loss,learning_rate):
    optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    global_step = tf.Variable(0,name='global_step',trainable=False)
    train_op = optimizer.minimize(loss,global_step = global_step)
    return train_op
def evaluation(modelOutput,correctOutput):
    return tf.reduce_mean(tf.square(tf.sub(modelOutput,correctOutput)))

def fill_feed_dict(data_set,input_pl, output_pl):
    inputData,correctOutputData = data_set.next_batch(1)
    feed_dict = {
        input_pl : inputData,
        output_pl : correctOutputData,
    }
    return feed_dict
def weight_variable(shape):
    initial = tf.truncated_normal(shape,stddev=0.1)
    return tf.Variable(initial)
def bias_variable(shape):
    initial = tf.constant(0.1,shape=shape)
    return tf.Variable(initial)
def do_eval(sess,
           eval_op,
           input_pl,
           modelOutput_pl,
           dataset,
           batch_size):
    average = 0
    steps_per_epoch = dataset.num_examples()//batch_size
    num_examples = steps_per_epoch * batch_size
    for step in range(steps_per_epoch):
        feed_dict = fill_feed_dict(dataset, input_pl, modelOutput_pl)
        average += sess.run(eval_op,feed_dict = feed_dict)
    precision = average/num_examples
    tf.histogram_summary("evaluation",precision)
    print('Num examples %d  Precision @ 1 %.04f'%(num_examples,precision))
    return precision

def placeholder_inputs(batch_size, number_input_nodes, number_output_nodes):
    inputs_pl = tf.placeholder(tf.float32,shape=[batch_size,number_input_nodes])
    outputs_pl = tf.placeholder(tf.float32,shape=[batch_size,number_output_nodes])
    return inputs_pl, outputs_pl

def run_training():
    dataSet = makeDataSet()
    print(dataSet.getNumberInputs())
    #define neural network parameters
    number_input_nodes = dataSet.getNumberInputs()
    print(number_input_nodes)


    number_output_nodes = 1
    learning_rate = 0.3
    number_hidden_nodes = 20
    output_dir = '/Users/ahartens/Desktop/tf'
    max_steps = 10000
    batch_size = 1
    
    graph = tf.Graph()
    with graph.as_default(),tf.device('/cpu:0'):
        input_pl,correctOutput_pl = placeholder_inputs(batch_size, number_input_nodes, number_output_nodes)
        
        modelOutput_op = inference(input_pl,
                                  number_input_nodes,
                                  number_hidden_nodes,
                                  number_output_nodes)
        loss_op = loss(modelOutput_op, correctOutput_pl)
        
        train_op = training(loss_op, learning_rate)
        eval_op = evaluation(modelOutput_op, correctOutput_pl)
        
        summary_op = tf.merge_all_summaries()
        
    with tf.Session(graph = graph) as sess:
        summary_writer = tf.train.SummaryWriter(output_dir, sess.graph)
        sess.run(tf.initialize_all_variables())
        
        for step in range(max_steps):
            myFeedDict = fill_feed_dict(dataSet.training,
                                       input_pl,
                                       correctOutput_pl)
            _,loss_value,summary_str = sess.run([train_op,loss_op,summary_op],feed_dict = myFeedDict)
            if(step%100 == 0):
                summary_writer.add_summary(summary_str)
                myFeedDict = fill_feed_dict(dataSet.test,
                                           input_pl,
                                           correctOutput_pl)
                summary_str,eval_str = sess.run([summary_op,eval_op],feed_dict = myFeedDict)
                summary_writer.add_summary(summary_str)
                summary_writer.flush()
                print("Loss val ist : %f"%loss_value)

In [4]:
run_training()

126
126
Loss val ist : 0.000003
Loss val ist : 0.084236
Loss val ist : 0.025115
Loss val ist : 0.000163
Loss val ist : 0.015570
Loss val ist : 0.001472
Loss val ist : 0.023772
Loss val ist : 0.077682
Loss val ist : 0.008969
Loss val ist : 0.028815
Loss val ist : 0.000349
Loss val ist : 0.017021
Loss val ist : 0.005024
Loss val ist : 0.024975
Loss val ist : 0.004576
Loss val ist : 0.021186
Loss val ist : 0.000255
Loss val ist : 0.007840
Loss val ist : 0.008767
Loss val ist : 0.007937
Loss val ist : 0.000023
Loss val ist : 0.003697
Loss val ist : 0.048999
Loss val ist : 0.001234
Loss val ist : 0.011455
Loss val ist : 0.002794
Loss val ist : 0.000104
Loss val ist : 0.000171
Loss val ist : 0.000001
Loss val ist : 0.001800
Loss val ist : 0.002434
Loss val ist : 0.007048
Loss val ist : 0.015392
Loss val ist : 0.039006
Loss val ist : 0.000611
Loss val ist : 0.002011
Loss val ist : 0.010678
Loss val ist : 0.000184
Loss val ist : 0.004961
Loss val ist : 0.006789
Loss val ist : 0.002756
Loss val