In [1]:
# AutoEncoders
import tensorflow as tf
import numpy as np


class AutoEncoder:
    def __init__(self, input_dim, hidden_dim, learning_rate=0.01, epochs=250):
        self.epochs = epochs
        self.learning_rate = learning_rate
        
        x = tf.placeholder(tf.float32, shape=[None, input_dim])
        
        with tf.name_scope('encode'):
            weights = tf.random.normal([input_dim, hidden_dim], dtype=tf.float32, name="weights")
            biases = tf.Variable(tf.zeros([hidden_dim], dtype=tf.float32, name="biases"))
            encoded = tf.nn.tanh(tf.add(tf.matmul(x, weights), biases))
            
        with tf.name_scope('decode'):
            weights = tf.random.normal([hidden_dim, input_dim], dtype=tf.float32, name="weights")
            biases = tf.Variable(tf.zeros([input_dim], dtype=tf.float32, name="biases"))
            decoded = tf.matmul(encoded, weights) + biases
            
        self.x = x
        self.encoded = encoded
        self.decoded = decoded
        
        self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.x, decoded))))
        self.train_op = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
        self.saver = tf.train.Saver()
        
    def train(self, data):
        num_samples = len(data)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for ep in range(self.epochs):
                for i in range(num_samples):
                    l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: [data[i]]})
                if ep % 10 == 0:
                    print("epoch : {0}, loss: {1}".format(ep, l))
            self.saver.save(sess, '/tmp/autoencoder.ckpt')
    
    def test(self, data):
        with tf.Session() as sess:
            self.saver.restore(sess, '/tmp/autoencoder.ckpt')
            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
        print('input', data)
        print('compresses', hidden)
        print('output', reconstructed)
        return reconstructed

    @staticmethod
    def get_batch(x, size):
        i = np.random.choice(len(x), size, replace=False)
        return x[i]

    def batch_train(self, data, batch_size=10):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epochs):
                for j in range(500):
                    batch_data = self.get_batch(data, batch_size)
                    l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data})
                if i % 10 == 0:
                    print("epoch: {0}, loss: {1}".format(i, l))
            self.saver.save(sess, '/tmp/autoencoder.ckpt')


In [2]:
from sklearn import datasets

hidden_dim = 1
data = datasets.load_iris().data
input_dim = len(data[0])

ae = AutoEncoder(input_dim, hidden_dim)
ae.train(data)

ae.test([[2, 4, 6, 8]])


epoch: 0, loss: 1.0393141508102417


epoch: 10, loss: 1.44948148727417


epoch: 20, loss: 1.756010890007019


epoch: 30, loss: 1.4919579029083252


epoch: 40, loss: 1.1964397430419922


epoch: 50, loss: 1.13486909866333


epoch: 60, loss: 1.3961279392242432


epoch: 70, loss: 1.1834945678710938


epoch: 80, loss: 1.712483286857605


epoch: 90, loss: 2.0333938598632812


epoch: 100, loss: 1.4539283514022827


epoch: 110, loss: 1.506359577178955


epoch: 120, loss: 1.5568138360977173


epoch: 130, loss: 1.242563009262085


epoch: 140, loss: 1.7463123798370361


epoch: 150, loss: 1.2418783903121948


epoch: 160, loss: 1.9502378702163696


epoch: 170, loss: 1.247479796409607


epoch: 180, loss: 1.183406949043274


epoch: 190, loss: 1.492090106010437


epoch: 200, loss: 1.050405740737915


epoch: 210, loss: 1.5531542301177979


epoch: 220, loss: 1.3760852813720703


epoch: 230, loss: 1.8792392015457153


epoch: 240, loss: 1.2520413398742676


INFO:tensorflow:Restoring parameters from /tmp/autoencoder.ckpt


input [[2, 4, 6, 8]]
compresses [[1.]]
output [[4.2766156  3.8181195  3.004415   0.89089084]]


array([[4.2766156 , 3.8181195 , 3.004415  , 0.89089084]], dtype=float32)

In [3]:
ae.batch_train(data)
ae.test([[2, 4, 6, 8]])



epoch: 0, loss: 1.0393141508102417


epoch: 10, loss: 1.44948148727417


epoch: 20, loss: 1.756010890007019


epoch: 30, loss: 1.4919579029083252


epoch: 40, loss: 1.1964397430419922


epoch: 50, loss: 1.13486909866333


epoch: 60, loss: 1.3961279392242432


epoch: 70, loss: 1.1834945678710938


epoch: 80, loss: 1.712483286857605


epoch: 90, loss: 2.0333938598632812


epoch: 100, loss: 1.4539283514022827


epoch: 110, loss: 1.506359577178955


epoch: 120, loss: 1.5568138360977173


epoch: 130, loss: 1.242563009262085


epoch: 140, loss: 1.7463123798370361


epoch: 150, loss: 1.2418783903121948


epoch: 160, loss: 1.9502378702163696


epoch: 170, loss: 1.247479796409607


epoch: 180, loss: 1.183406949043274


epoch: 190, loss: 1.492090106010437


epoch: 200, loss: 1.050405740737915


epoch: 210, loss: 1.5531542301177979


epoch: 220, loss: 1.3760852813720703


epoch: 230, loss: 1.8792392015457153


epoch: 240, loss: 1.2520413398742676


INFO:tensorflow:Restoring parameters from /tmp/autoencoder.ckpt


input [[2, 4, 6, 8]]
compresses [[1.]]
output [[4.2766156  3.8181195  3.004415   0.89089084]]


array([[4.2766156 , 3.8181195 , 3.004415  , 0.89089084]], dtype=float32)