In [1]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf

import os
import time
import numpy as np
import glob
import matplotlib.pyplot as plt
import PIL
import imageio

from IPython import display

np.set_printoptions(precision=3, suppress=True)

In [6]:
COLUMN_NAMES = ["phi0", "phi1", "phi2", "phi3", 
                "phi4", "phi5", "psi0", "psi1", 
                "psi2", "psi3", "psi4", "psi5", "cluster_id"]
LABEL_NAME = "cluster_id"
INPUT_NAMES = list(COLUMN_NAMES)
INPUT_NAMES.remove(LABEL_NAME)
CSV_PATH = "asp7/asp7.csv"
BATCH_SIZE = 10
SHUFFLE_BUFFER_SIZE = 10000
CORES_USED = 3
with open(CSV_PATH) as f:
    ROW_COUNT = sum(1 for line in f) - 1
f.close()
# Sets a split size for train and test data set
TRAIN_SIZE = int(ROW_COUNT * 0.7)

original_ds = tf.data.experimental.make_csv_dataset(
    file_pattern = CSV_PATH,
    batch_size = BATCH_SIZE,
    column_names=COLUMN_NAMES,
    column_defaults=None,
    label_name=LABEL_NAME,
    select_columns=None,
    field_delim=',',
    use_quote_delim=True,
    na_value='',
    header=True,
    num_epochs=None,
    shuffle=True,
    shuffle_buffer_size=SHUFFLE_BUFFER_SIZE,
    shuffle_seed=None,
    prefetch_buffer_size=None,
    num_parallel_reads=CORES_USED,
    sloppy=False,
    num_rows_for_inference=100,
    compression_type=None,
    ignore_errors=False).shuffle(SHUFFLE_BUFFER_SIZE)

train_ds = original_ds.take(TRAIN_SIZE)
test_ds = original_ds.skip(TRAIN_SIZE)

In [7]:
def show_batch_wo_label(dataset):
    for batch in dataset.take(1):
        for key, value in batch.items():
            print("{:20s}: {}".format(key,value.numpy()))
def show_batch_w_label(dataset):
    for batch, label in dataset.take(1):
        for key, value in batch.items():
            print("{:20s}: {}".format(key,value.numpy()))

In [8]:
class PackNumericFeatures(object):
    def __init__(self, names):
        self.names = names

    def __call__(self, features, labels):
        numeric_features = [features.pop(name) for name in self.names]
        numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
        numeric_features = tf.stack(numeric_features, axis=-1)
        features['numeric'] = numeric_features
        #returns the features is two forms acked together which can be used as input and controll output later on
        return features, features['numeric']

packed_train_ds = train_ds.map(PackNumericFeatures(INPUT_NAMES))
packed_test_ds = test_ds.map(PackNumericFeatures(INPUT_NAMES))

numeric_column = tf.feature_column.numeric_column('numeric', shape=[len(INPUT_NAMES)])
numeric_columns = [numeric_column]

example_batch, labels_batch = next(iter(packed_train_ds))    
#print(example_batch)
#print(labels_batch)

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
numeric_layer = tf.keras.layers.DenseFeatures(numeric_columns)
model = tf.keras.Sequential([
    numeric_layer,
    Dense(len(INPUT_NAMES)),
    Dense(1, activation='tanh'),
    Dense(len(INPUT_NAMES))
])
model.compile(optimizer=tf.keras.optimizers.Adam(),
                    loss=tf.keras.losses.MeanSquaredError(),
                    metrics=[tf.keras.metrics.MeanSquaredError()])

model.fit(packed_train_ds, epochs=1)



Train for 7000 steps

Test accuracy: 1.6098665


In [None]:
test_loss, test_acc = model.evaluate(packed_test_ds, verbose=1, steps = 10)
print('\nTest accuracy:', test_acc)