# Logistic regression

## Set the stage

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

Load the "weed" data in CSV format. The inputs are floating point values, the output is a class label encoded as an integer. The first entries in a row are the inputs, the final entry is the target.

In [None]:
# Load data
dataTrain = tf.contrib.learn.datasets.base.load_csv_without_header(filename="../data/LSDA2017WeedCropTrain.csv", 
                                                                   target_dtype=np.int, 
                                                                   features_dtype=np.float32, 
                                                                   target_column=-1)
dataTest = tf.contrib.learn.datasets.base.load_csv_without_header(filename="../data/LSDA2017WeedCropTest.csv", 
                                                                  target_dtype=np.int, 
                                                                  features_dtype=np.float32, 
                                                                  target_column=-1)

# Input dimension
inDim = dataTrain.data.shape[1]

## Define the graph

The `None` keyword in the `shape` definition corresponds to a variable-sized dimension. So the code works for any number of training or test data points.

In [None]:
# Initialize placeholders
x_data = tf.placeholder(shape=[None, inDim], dtype=tf.float32)
y_target = tf.placeholder(shape=[None, 1], dtype=tf.float32)

# Create variables for logistic regression
A = tf.Variable(tf.random_normal(shape=[inDim,1]))
b = tf.Variable(tf.random_normal(shape=[1,1]))

# Declare model operations
model_output = tf.add(tf.matmul(x_data, A), b)

# Declare loss function 
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=model_output, labels=y_target))

# Declare optimizer
my_opt = tf.train.GradientDescentOptimizer(0.00001)
train_step = my_opt.minimize(loss)

The output of the model is the discriminant function, $\delta$ on the slides. Applying a sigmoid maps $\delta$ to the probability $P(y=1|x)$. Then we round the probability to the next integer, which corresponds to thresholding the probability at 0.5. This gives binary predictions. These are then compared to the desored targets. `tf.reduce_mean` computes the average, 

In [None]:
# Map model output to binary predictions
prediction = tf.round(tf.sigmoid(model_output))
predictions_correct = tf.cast(tf.equal(prediction, y_target), tf.float32)
accuracy = tf.reduce_mean(predictions_correct)

## Start session and run graph

In [None]:
# Start session
sess = tf.Session()

# Initialize variables
init = tf.global_variables_initializer()
sess.run(init)

In [None]:
# Training loop
loss_vec = []
train_acc = []
test_acc = []
for i in range(10000):
    sess.run(train_step, feed_dict={x_data: dataTrain.data, y_target: np.transpose([dataTrain.target])})
    temp_loss = sess.run(loss, feed_dict={x_data: dataTrain.data, y_target: np.transpose([dataTrain.target])})
    loss_vec.append(temp_loss)
    temp_acc_train = sess.run(accuracy, feed_dict={x_data: dataTrain.data, y_target: np.transpose([dataTrain.target])})
    train_acc.append(temp_acc_train)
    temp_acc_test = sess.run(accuracy, feed_dict={x_data: dataTest.data, y_target: np.transpose([dataTest.target])})
    test_acc.append(temp_acc_test)
    if (i+1)%100==0:
        print(str(i+1) + ': Loss = ' + str(temp_loss) + ', Training acc. = ' + str(temp_acc_train)  + 
              ', Test acc. = ' + str(temp_acc_test) )
        


Took me some time to correctly pass the labels in the binary case:

In [None]:
print(dataTest.target)
print([dataTest.target])
print(np.transpose([dataTest.target]))

## Plot results

In [None]:
# Plot loss over time
plt.figure(1)
plt.plot(loss_vec, 'k-')
plt.title('Cross Entropy')
plt.xlabel('Iteration')
plt.ylabel('Cross Entropy')

# Plot train and test accuracy
plt.figure(2)
plt.plot(train_acc, 'k-', label='Training')
plt.plot(test_acc, 'r--', label='Test')
plt.title('Accuracy')
plt.xlabel('Generation')
plt.ylabel('Accuracy')
plt.legend(loc='lower right')
plt.show()