In [1]:
import tensorflow as tf
import numpy as np
import random

In [2]:
data = np.array([(x,x) for x in np.arange(10e5)],dtype=[('label','int32'),('input', 'int32')])
np.random.shuffle(data)

In [3]:
training_fraction = 0.85
train_end = int(len(data) * training_fraction)

labels, inputs = zip(*data)
train_labels, train_inputs = np.array(labels[:train_end]), np.array(inputs[:train_end])
test_labels, test_inputs = np.array(labels[train_end:]), np.array(inputs[train_end:])

In [4]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_inputs, train_labels))
test_dataset = tf.data.Dataset.from_tensor_slices((test_inputs, test_labels))

In [5]:
train_dataset.take(1)

<TakeDataset shapes: ((), ()), types: (tf.int32, tf.int32)>

In [6]:
# How much it loads into memory for sampling
BUFFER_SIZE = 100000
# Batch for gradient averaging
BATCH_SIZE = 64
# prefetch parrallelising loading + execution (not huge so not necessary)

train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(BATCH_SIZE*2)
test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(BATCH_SIZE*2)

In [7]:
sample, label = next(iter(train_dataset.take(1)))
print(sample, label)

tf.Tensor(
[148169   2043 706398 893290 501723 142653 138937  59461 439160 415494
 686416 493837 622210  57827 485866  72789 841402 105311 873995 669419
  32997 790671 795046 941943 943020 659629 493527 213510 394333  11241
 872020 989414 123002 898989 386961 475653 379802 436198 332336 871356
 899361 832365  35764 521489 313912 195112 421665  96668 724660 609607
 950451 485574 581799 643635 413537 373706 425740 766703 429613 854867
 713091 389457 495364 742086], shape=(64,), dtype=int32) tf.Tensor(
[148169   2043 706398 893290 501723 142653 138937  59461 439160 415494
 686416 493837 622210  57827 485866  72789 841402 105311 873995 669419
  32997 790671 795046 941943 943020 659629 493527 213510 394333  11241
 872020 989414 123002 898989 386961 475653 379802 436198 332336 871356
 899361 832365  35764 521489 313912 195112 421665  96668 724660 609607
 950451 485574 581799 643635 413537 373706 425740 766703 429613 854867
 713091 389457 495364 742086], shape=(64,), dtype=int32)


In [8]:
model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(1),
    ]
)

model.compile(
    loss=tf.keras.losses.MeanSquaredError(),
    optimizer=tf.keras.optimizers.Adam(),
)

In [None]:
history = model.fit(
    train_dataset, epochs=100, validation_data=test_dataset, validation_steps=len(test_labels) // BATCH_SIZE
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100

In [None]:
res = model.evaluate(test_dataset)

In [None]:
import matplotlib.pyplot as plt

def plot_graphs(history, metric):
    plt.plot(history.history[metric])
    plt.plot(history.history["val_" + metric], "")
    plt.xlabel("Epochs")
    plt.ylabel(metric)
    plt.legend([metric, "val_" + metric])

In [None]:
plot_graphs(history,'loss')

In [None]:
test_predictions = model.predict(test_inputs)
train_predictions = model.predict(train_inputs)
plt.scatter(train_labels, train_predictions, label='train', s=2)
plt.scatter(test_labels, test_predictions, label='test', s=2)
plt.legend()
plt.xlabel('Truth')
plt.ylabel('Prediction')
plt.savefig('passthrough.png')
plt.show()