In [96]:
import tensorflow as tf
import numpy as np

class Autoencoder:
    def __init__(self, input_dim, hidden_dim, epoch=250, learning_rate=0.001):
        self.epoch = epoch
        self.learning_rate = learning_rate
        
        x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim])
        
        with tf.name_scope('encode'):
            weights = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')
            biases = tf.Variable(tf.zeros([hidden_dim]), name='biases')
            encoded = tf.nn.tanh(tf.matmul(x, weights) + biases)
            
        with tf.name_scope('decode'):
            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')
            biases = tf.Variable(tf.zeros([input_dim]), name='biases')
            decoded = tf.matmul(encoded, weights) + biases
            
        self.x = x
        self.encoded = encoded
        self.decoded = decoded
        
        self.loss = tf.sqrt(tf.reduce_mean(tf.square(self.x - self.decoded)))
        self.train_op = tf.train.RMSPropOptimizer(self.learning_rate).minimize(self.loss)
        self.saver = tf.train.Saver() #определяем модуль сохранения
        
    def train(self, data):
        num_samples = len(data)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epoch):
                for j in range(num_samples):
                    l, _ = sess.run([self.loss, self.train_op], feed_dict = {self.x: [data[j]]})
                if i % 10 == 0:
                    print('epoch {0}: loss = {1}'.format(i, l))
                    self.saver.save(sess, './logs/model.ckpt')
                self.saver.save(sess, './logs/model.ckpt')
                
    def get_batch(self, X, size):
        a = np.random.choice(len(X), size, replace = False)
        return X[a]
                
    def train_batch(self, data, batch_size):
        num_samples = len(data)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            for i in range(self.epoch):
                for j in range(num_samples // batch_size):
                    batch_data = self.get_batch(data, batch_size)
                    l, _ = sess.run([self.loss, self.train_op], feed_dict = {self.x: batch_data})
                if i % 10 == 0:
                    print('epoch {0}: loss = {1}'.format(i, l))
                    self.saver.save(sess, './logs/model.ckpt')
                self.saver.save(sess, './logs/model.ckpt')                
                
    def test(self, data):
        with tf.Session() as sess:
            self.saver.restore(sess, './logs/model.ckpt')
            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict = {self.x: data})
            print('input', data)
            print('compressed', hidden)
            print('reconstructed', reconstructed)
            return reconstructed           

In [53]:
from sklearn import datasets

hidden_dim = 1
data = datasets.load_iris().data
input_dim = len(data[0])
ae = Autoencoder(input_dim, hidden_dim)
ae.train(data)
ae.test([[8, 4, 6, 2]])

150
epoch 0: loss = 3.8507254123687744
epoch 10: loss = 2.075183868408203
epoch 20: loss = 0.8333063721656799
epoch 30: loss = 0.47759708762168884
epoch 40: loss = 0.4165869951248169
epoch 50: loss = 0.409258633852005
epoch 60: loss = 0.4082566797733307
epoch 70: loss = 0.4081191420555115
epoch 80: loss = 0.4080994427204132
epoch 90: loss = 0.40809595584869385
epoch 100: loss = 0.4080948829650879
epoch 110: loss = 0.4080953896045685
epoch 120: loss = 0.4081081748008728
epoch 130: loss = 0.40814492106437683
epoch 140: loss = 0.40817853808403015
epoch 150: loss = 0.40821290016174316
epoch 160: loss = 0.40827006101608276
epoch 170: loss = 0.4083750247955322
epoch 180: loss = 0.40859928727149963
epoch 190: loss = 0.4091542363166809
epoch 200: loss = 0.4106757640838623
epoch 210: loss = 0.4146808385848999
epoch 220: loss = 0.4223628640174866
epoch 230: loss = 0.4415728747844696
epoch 240: loss = 0.4225011169910431
INFO:tensorflow:Restoring parameters from ./logs/model.ckpt
input [[8, 4, 6, 

array([[ 6.38743019,  3.19602633,  4.72290134,  1.62135875]], dtype=float32)

In [97]:
import numpy as np
import pickle

def unpickle(file):
    fo = open(file, 'rb')
    dict = pickle.load(fo, encoding='latin1')
    fo.close()
    return dict

def grayscale(a):
    return a.reshape(a.shape[0], 3, 32, 32).mean(1).reshape(a.shape[0], -1)

names = unpickle('./data/cifar-10-batches-py/batches.meta')['label_names']
data, labels = [], []
for i in range(1, 6):
    filename = './data/cifar-10-batches-py/data_batch_' + str(i)
    batch_data = unpickle(filename)
    if len(data) > 0:
        data = np.vstack((data, batch_data['data']))
        labels = np.vstack((labels, batch_data['labels']))
    else:
        data = batch_data['data']
        labels = batch_data['labels']
        
x = np.matrix(data)
y = np.array(labels)

horse_indices = np.where(y == 7)[0]

horse_x = x[horse_indices]
print(np.shape(horse_x))

input_dim = np.shape(horse_x)[1]
hidden_dim = 100
ae = Autoencoder(input_dim, hidden_dim, learning_rate=0.005)
ae.train_batch(horse_x, 500)

(5000, 3072)
epoch 0: loss = 127.0562973022461
epoch 10: loss = 127.3230972290039
epoch 20: loss = 107.41841125488281
epoch 30: loss = 79.99224853515625
epoch 40: loss = 56.97968292236328
epoch 50: loss = 38.03450012207031
epoch 60: loss = 27.676345825195312
epoch 70: loss = 20.53014373779297
epoch 80: loss = 14.233002662658691
epoch 90: loss = 9.395323753356934
epoch 100: loss = 5.641231060028076
epoch 110: loss = 2.8191022872924805
epoch 120: loss = 1.182726263999939
epoch 130: loss = 0.46169692277908325
epoch 140: loss = 0.2340492159128189
epoch 150: loss = 0.1619233340024948
epoch 160: loss = 0.1336836963891983
epoch 170: loss = 0.14876224100589752
epoch 180: loss = 0.14060989022254944
epoch 190: loss = 0.14901798963546753
epoch 200: loss = 0.14996972680091858
epoch 210: loss = 0.1501391977071762
epoch 220: loss = 0.14619112014770508
epoch 230: loss = 0.15240155160427094
epoch 240: loss = 0.15987463295459747
