In [1]:
import tensorflow as tf
import numpy as np

In [9]:
def get_batch(X, size):
    a = np.random.choice(len(X), size, replace=False)
    return X[a]

In [20]:
class Autoencoder:
    def __init__(self, input_dim, hidden_dim, epoch=250, learning_rate=0.001):
        self.epoch = epoch
        self.learning_rate = learning_rate
        
        x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim])
        
        with tf.name_scope('encode'):
            weights = tf.Variable(tf.random_normal([input_dim, hidden_dim]), dtype=tf.float32, name='weights')
            biases = tf.Variable(tf.zeros([hidden_dim]), name='biases')
            encoded = tf.nn.tanh(tf.matmul(x, weights) + biases)
        with tf.name_scope('decode'):
            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')
            biases = tf.Variable(tf.zeros([input_dim]), name='biases')
            decoded = tf.matmul(encoded, weights) + biases
        
        self.x = x
        self.encoded = encoded
        self.decoded = decoded
        
        self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.x, self.decoded))))
        
        self.train_op = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
        self.saver = tf.train.Saver()
        
    def train(self, data):
        num_samples = len(data)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epoch):
                for j in range(num_samples):
                    l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: [data[j]]})
                if i % 10 == 0:
                    print('epoch {0}: loss = {1}'.format(i, l))
                    self.saver.save(sess, './model.ckpt')
            self.saver.save(sess, './model.ckpt')
    
    def train_batch(self, data, batch_size=10):
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epoch):
                for j in range(500):
                    batch_data = get_batch(data, batch_size)
                    l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data})
                if i % 50 == 0:
                    print('epoch {0}: loss={1}'.format(i, l))
                    self.saver.save(sess, './model.ckpt')
            self.saver.save(sess, './model.ckpt')
    
    def test(self, data):
        with tf.Session() as sess:
            self.saver.restore(sess, './model.ckpt')
            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
            print('input', data)
            print('compressed', hidden)
            print('reconstructed', reconstructed)
            return reconstructed

In [8]:
from sklearn import datasets

hidden_dim = 1
data = datasets.load_iris().data
input_dim = len(data[0])
ae = Autoencoder(input_dim, hidden_dim)
ae.train(data)
ae.test([[8,4,6,2]])

epoch 0: loss = 4.198827266693115
epoch 10: loss = 2.0768535137176514
epoch 20: loss = 0.9981046319007874
epoch 30: loss = 0.5324809551239014
epoch 40: loss = 0.3828141391277313
epoch 50: loss = 0.3655385971069336
epoch 60: loss = 0.35946860909461975
epoch 70: loss = 0.35231539607048035
epoch 80: loss = 0.341936856508255
epoch 90: loss = 0.32704949378967285
epoch 100: loss = 0.3060683012008667
epoch 110: loss = 0.2835567593574524
epoch 120: loss = 0.26376065611839294
epoch 130: loss = 0.2513621151447296
epoch 140: loss = 0.24227002263069153
epoch 150: loss = 0.23086513578891754
epoch 160: loss = 0.21080884337425232
epoch 170: loss = 0.21516326069831848
epoch 180: loss = 0.2185724824666977
epoch 190: loss = 0.2204909324645996
epoch 200: loss = 0.22207975387573242
epoch 210: loss = 0.2235858291387558
epoch 220: loss = 0.22503425180912018
epoch 230: loss = 0.22640833258628845
epoch 240: loss = 0.22769129276275635
input [[8, 4, 6, 2]]
compressed [[-0.74908912]]
reconstructed [[ 6.2937212  

array([[ 6.2937212 ,  2.90197134,  4.99299431,  1.74079442]], dtype=float32)

In [21]:
hidden_dim = 1
batch_size = 10
data = datasets.load_iris().data
input_dim = len(data[0])
ae = Autoencoder(input_dim, hidden_dim)
ae.train_batch(data, batch_size)
ae.test([[8,4,6,2]])

epoch 0: loss=2.5457303524017334
epoch 50: loss=0.3011484742164612
epoch 100: loss=0.27178338170051575
epoch 150: loss=0.2699034512042999
epoch 200: loss=0.3362778127193451
input [[8, 4, 6, 2]]
compressed [[ 0.38115057]]
reconstructed [[ 6.89253139  2.8066318   6.26520872  2.26308823]]


array([[ 6.89253139,  2.8066318 ,  6.26520872,  2.26308823]], dtype=float32)