# Intro to Autoencoders

Выявление аномалии с помощью автокодировщика.
Построить автокодировщик, позволяющий выявить в картинках одежды (база fashion_mnist) аномалии. В качестве аномальных картинок использовать рукописные цифры (база mnist). Автокодировщик должен детектировать подаваемые на него рукописные цифры как аномальные, а подаваемые на него элементы одежды как нормальные.
https://www.tensorflow.org/tutorials/generative/autoencoder


## Import TensorFlow and other libraries

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Model

## Load the dataset
To start, you will train the basic autoencoder using the Fashion MNIST dataset. Each image in this dataset is 28x28 pixels.

In [None]:
(x_train, _), (x_test, _) = fashion_mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

print (x_train.shape)
print (x_test.shape)

## First example: Basic autoencoder
![Basic autoencoder results](https://github.com/tensorflow/docs/blob/master/site/en/tutorials/generative/images/intro_autoencoder_result.png?raw=1)

Define an autoencoder with two Dense layers: an `encoder`, which compresses the images into a 64 dimensional latent vector, and a `decoder`, that reconstructs the original image from the latent space.

To define your model, use the [Keras Model Subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models).


In [None]:
class Autoencoder(Model):
  def __init__(self, latent_dim, shape):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim
    self.shape = shape
    self.encoder = tf.keras.Sequential([
      layers.Flatten(),
      layers.Dense(latent_dim, activation='relu'),
    ])
    self.decoder = tf.keras.Sequential([
      layers.Dense(tf.math.reduce_prod(shape).numpy(), activation='sigmoid'),
      layers.Reshape(shape)
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded


shape = x_test.shape[1:]
latent_dim = 64
autoencoder = Autoencoder(latent_dim, shape)


In [None]:
tf.math.reduce_prod(shape).numpy() == (shape[0]*shape[1])

In [None]:
autoencoder.compile(optimizer='adam', loss=losses.MeanSquaredError())

Train the model using `x_train` as both the input and the target. The `encoder` will learn to compress the dataset from 784 dimensions to the latent space, and the `decoder` will learn to reconstruct the original images.
.

In [None]:
autoencoder.fit(x_train, x_train,
                epochs=8,
                shuffle=True,
                validation_data=(x_test, x_test))

In [None]:
mnist = tf.keras.datasets.mnist
(anomalous_x_train, _), (anomalous_x_test, _) = mnist.load_data()

In [None]:
anomalous_x_train = anomalous_x_train.astype('float32') / 255.
anomalous_x_test = anomalous_x_test.astype('float32') / 255.

In [None]:
n = 10
plt.figure(figsize=(20, 2))
for i in range(n):
    ax = plt.subplot(1, n, i + 1)
    plt.title("original + noise")
    plt.imshow(tf.squeeze(anomalous_x_train[i]))
    plt.gray()
plt.show()

### Detect anomalies

In [None]:
reconstructions = autoencoder.predict(x_test)
ss=x_test.shape[0]
loss = tf.keras.losses.mae(reconstructions.flatten().reshape((ss, -1)), x_test.flatten().reshape((ss, -1)))

plt.hist(loss, bins=50)
plt.xlabel("normal test loss")
plt.ylabel("No of examples")
plt.show()

In [None]:
x_test.shape, reconstructions.shape, loss.shape

Choose a threshold value that is one standard deviations above the mean.

In [None]:
threshold = np.mean(loss) + np.std(loss)
threshold, np.mean(loss), np.std(loss)

In [None]:
anomalous_x_test.shape

In [None]:
# rake_anomalous_x_test = anomalous_x_test[..., tf.newaxis]
ano_reconstructions = autoencoder.predict(anomalous_x_test)
ano_ss=anomalous_x_test.shape[0]
ano_loss = tf.keras.losses.mae(ano_reconstructions.flatten().reshape((ano_ss, -1)), anomalous_x_test.flatten().reshape((ano_ss, -1)))

plt.hist(ano_loss, bins=50)
plt.xlabel("anomaly loss")
plt.ylabel("No of examples")
plt.show()

In [None]:
np.mean(ano_loss), np.std(ano_loss)

In [None]:
def make_classification_init(a1, a2):
  return np.concatenate((a1, a2), axis=0), np.concatenate((np.full(a1.shape[0], 0), np.full(a2.shape[0], 1)), axis=0)
make_classification_init(np.array([[1,2],[2,3]]), np.array([[3,4],[4,5]]))

In [None]:
x_test.shape, anomalous_x_test.shape

In [None]:
ax = plt.subplot(1, 2, 1)
plt.title("norm")
plt.imshow(tf.squeeze(x_test[i]))
plt.gray()
ax = plt.subplot(1, 2, 2)
plt.title("anomaly")
plt.imshow(tf.squeeze(anomalous_x_test[i]))
plt.show()

In [None]:
X, y = make_classification_init(x_test, anomalous_x_test)

In [None]:
reconstructions = autoencoder(X)

In [None]:
ss=reconstructions.shape[0]
loss = tf.keras.losses.mae(np.array(reconstructions).flatten().reshape((ss, -1)), np.array(X).flatten().reshape((ss, -1)))

In [None]:
loss.shape

In [None]:
plt.hist(loss, bins=50)
plt.xlabel("mixed test loss")
plt.ylabel("No of examples")
plt.show()

In [None]:
preds = tf.math.less(threshold, loss)

In [None]:
print("Accuracy = {}".format(accuracy_score(y, preds)))

Использован кодировщик с одним полносвязным слоем, выдающий 64 признака. Декодировщикк симметричен. Порог аномалии выбран как одна сигма от среднего значения ошибки нормальной картинки.

Получается с точностью 87% отличить на картинке элемент одежды от аномалии, которая в данном случае является рукописной цифрой

В качестве улучшения можно увеличивать количество нейронов, вводить сверточные слои и искать оптимальные количество признаков и порог аномалии