In [142]:
import tensorflow as tf
import numpy as np
import csv
import sklearn.model_selection as sk
from datetime import datetime

In [143]:
def read_metadata(filename):
    with open(filename, mode='r') as infile:
        reader = csv.reader(infile)
        mydict = {rows[0]:rows[1] for rows in reader}
    return mydict

my_meta_dict = read_metadata('metadata.csv')
num_labels = int(my_meta_dict['nb_labels'])

In [144]:
def read_csv(my_csv):
    data = []
    labels = []
    with open(my_csv, newline='') as csvfile:
         reader = csv.DictReader(csvfile)
         for row in reader:
             data.append([row['x'],row['y'],row['intent']])
             labels.append(row['cluster'])
             #print(row['x'], row['y'],row['cluster'],row['intent'])
         X = np.array(data)
         print(X.shape)
         y = np.array(labels)
         X_train, X_test, y_train, y_test = sk.train_test_split(X, y, test_size=0.2)
         X_train, X_val, y_train, y_val = sk.train_test_split(
             X_train, y_train, test_size=0.2, random_state=1)
         print(X_train.shape)

         return X_train, X_val, X_test, y_train, y_val, y_test



def string_to_index(my_list):
    indexed = []
    unique_vals = np.unique(my_list).tolist()
    for i in range(len(my_list)):
        indexed.append(float(unique_vals.index(my_list[i])))
    indexed = np.array(indexed)
    return indexed

def change_data(my_array):
    new_intent = string_to_index(my_array[:,2])
    new_x = my_array[:,0].astype(np.float)
    new_y = my_array[:,1].astype(np.float)

    new_array = np.array((new_x,new_y,new_intent))
    return new_array.transpose()

def string_to_float(my_array):
    return my_array.astype(np.float)

##transform labels into arrays for training
def to_one_hot(y, n_classes=num_labels):
    y = y.astype(np.int)
    _y = np.zeros((len(y), n_classes))
    _y[np.arange(len(y)), y] = 1
    return _y

X_train, X_val, X_test, y_train, y_val, y_test = read_csv('clusters.csv')
X_train = change_data(X_train)
X_val = change_data(X_val)
X_test = change_data(X_test)
y_train = string_to_float(y_train)
y_train = to_one_hot(y_train)
y_val = string_to_float(y_val)
y_val = to_one_hot(y_val)
y_test = string_to_float(y_test)
y_test = to_one_hot(y_test)
print('xtrain')
print(X_train.shape)
print('xtest')
print(X_test.shape)
print('xval')
print(X_val.shape)
print('ytrain')
print(y_train.shape)
print('yvalid')
print(y_val.shape)
print('ytest')
print(y_test.shape)

(100, 3)
(64, 3)
xtrain
(64, 3)
xtest
(20, 3)
xval
(16, 3)
ytrain
(64, 17)
yvalid
(16, 17)
ytest
(20, 17)


In [145]:
batch_size = 16
patch_size = 5
depth = 16
num_hidden = 64
nb_points = 64
num_channels = 3
now = datetime.now()
logdir = "./Classifier/" + now.strftime("%Y%m%d-%H%M%S") + "/"

graph = tf.Graph()

with graph.as_default():
    #x = tf.placeholder(tf.float32, [nb_points, 3], name='X')  # inputs
    #y = tf.placeholder(tf.float32, [None, 1], name='Y')  # outputs
    # Input data.
    tf_train_dataset = tf.placeholder(
    tf.float32, shape=[batch_size,num_channels], name='train_dataset')
    tf_train_labels = tf.placeholder(tf.float64, shape=[16,num_labels],name = 'train_labels')
    tf_valid_dataset = tf.constant(X_val,dtype=tf.float32)
    tf_test_dataset = tf.constant(X_test,dtype=tf.float32)
    
    # Variables.
    with tf.name_scope('hidden') as scope:
        W1 = tf.Variable(tf.truncated_normal(
            [num_channels, depth], stddev=0.1),dtype=tf.float32)
        b1 = tf.Variable(tf.zeros([depth]),dtype=tf.float32)
        W2 = tf.Variable(tf.truncated_normal(
              [depth, depth], stddev=0.1),dtype=tf.float32)
        b2 = tf.Variable(tf.constant(1.0, shape=[depth]),dtype=tf.float32)
        W3 = tf.Variable(tf.truncated_normal(
              [depth, num_hidden], stddev=0.1),dtype=tf.float32)
        b3 = tf.Variable(tf.constant(1.0, shape=[num_hidden]),dtype=tf.float32)
        W4 = tf.Variable(tf.truncated_normal(
              [num_hidden, num_labels], stddev=0.1),dtype=tf.float32)
        b4 = tf.Variable(tf.constant(1.0, shape=[num_labels]),dtype=tf.float32)
    
    # Model.
    def model(data):
        with tf.name_scope('layers'):
            # compute layer 1 
            with tf.name_scope('A1'):
                A1 = tf.nn.sigmoid(tf.matmul(data, W1) + b1)
            #compute layer 2
            with tf.name_scope('A2'):
                A2 = tf.nn.sigmoid(tf.matmul(A1, W2) + b2)
            #compute layer 3
            with tf.name_scope('A3'):
                A3 = tf.nn.sigmoid(tf.matmul(A2, W3) + b3)
            #Prevent overfitting with drouput regularization
            dropout= tf.nn.dropout(A3, keep_prob=0.5)
            return (tf.matmul(dropout, W4) + b4)

    #summaries for tensorboard
    def variable_summaries(var):
        """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
        with tf.name_scope('summaries'):
            mean = tf.reduce_mean(var)
            tf.summary.scalar('mean', mean)
        with tf.name_scope('stddev'):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
            tf.summary.scalar('stddev', stddev)
            tf.summary.scalar('max', tf.reduce_max(var))
            #tf.summary.scalar('min', tf.reduce_min(var))
            tf.summary.histogram('histogram', var)
    
    #calculate accuracy
    def accuracy(predictions, labels):
        return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
                / predictions.shape[0])

    # Training computation.
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=tf_train_labels))
    variable_summaries(loss)


    # Optimizer.
    optimizer = tf.train.GradientDescentOptimizer(0.05).minimize(loss)
    
    # Include some Tensorboard visualization
    writer_train = tf.summary.FileWriter(logdir)
    loss_summary = tf.summary.scalar('loss_summary',loss)

    # Predictions for the training, validation, and test data.
    with tf.name_scope('predictions'):
        with tf.name_scope('train'):
            train_prediction = tf.nn.softmax(logits)
        with tf.name_scope('valid'):
            valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
        with tf.name_scope('test'):
            test_prediction = tf.nn.softmax(model(tf_test_dataset))
    
    # Merge all the summaries
    #merged = tf.summary.merge_all()
    #variables_names =[v.name for v in tf.trainable_variables()]

In [146]:
num_steps = 1000



with tf.Session(graph=graph) as session:
  session.run(tf.global_variables_initializer())
  writer_train.add_graph(session.graph)
  print('Initialized')

  for step in range(num_steps):
    offset = (step * batch_size) % (y_train.shape[0] - batch_size)
    batch_data = X_train[offset:(offset + batch_size),:]
    batch_labels = y_train[offset:(offset + batch_size)]
    feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
    _, l, predictions,ls = session.run(
      [optimizer, loss, train_prediction,loss_summary], feed_dict=feed_dict)
    if (step % 50 == 0):
      print('Minibatch loss at step %d: %f' % (step, l))
      print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
      print('Validation accuracy: %.1f%%' % accuracy(
        valid_prediction.eval(), y_val))
    
    writer_train.add_summary(ls,step)
  print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), y_test))

Initialized
Minibatch loss at step 0: 3.615470
Minibatch accuracy: 0.0%
Validation accuracy: 0.0%
Minibatch loss at step 50: 1.403585
Minibatch accuracy: 68.8%
Validation accuracy: 50.0%
Minibatch loss at step 100: 2.252873
Minibatch accuracy: 43.8%
Validation accuracy: 50.0%
Minibatch loss at step 150: 0.927659
Minibatch accuracy: 81.2%
Validation accuracy: 50.0%
Minibatch loss at step 200: 1.440455
Minibatch accuracy: 68.8%
Validation accuracy: 50.0%
Minibatch loss at step 250: 2.507010
Minibatch accuracy: 43.8%
Validation accuracy: 50.0%
Minibatch loss at step 300: 0.925275
Minibatch accuracy: 81.2%
Validation accuracy: 50.0%
Minibatch loss at step 350: 1.492309
Minibatch accuracy: 68.8%
Validation accuracy: 50.0%
Minibatch loss at step 400: 2.217010
Minibatch accuracy: 43.8%
Validation accuracy: 50.0%
Minibatch loss at step 450: 0.906179
Minibatch accuracy: 81.2%
Validation accuracy: 50.0%
Minibatch loss at step 500: 1.428463
Minibatch accuracy: 68.8%
Validation accuracy: 50.0%
Min