# 降维

## 变分自编码(VAE)
Variational AutoEncoedr是AutoEncoder的一种，融合贝叶斯方法和深度学习的优势，拥有优雅的数学基础和简单易懂的架构以及令人满意的性能，使得它比一般的生成模型具有更广泛的意义

`生成器`
* 其目标是要得到p(z|x)的分布，即给定输入数据x的分布，得到潜在变量Z的分布
* 为了求解真实的后验p(z|x)的概率分布，VAE引入了一个识别模型q(z|x)去近似p(z|x)，衡量这两个分布的差异使用相对熵(KL散度)，VAE的目的就是让这个相对熵越小
$$
-KL(q(z|x) || p(z)) = \frac{1}{2} (1 + log(\sigma_i^2) - \mu_j^2 - \sigma_j^2)
$$
代码实现

```python
# p(Z|X)的均值和方差
z_mean = Dense(latent_dim)(h)
z_log_var = Dense(latent_dim)(h)

 # KL散度
kl_loss = - 0.5 * K.sum(
    1 + z_log_var - K.square(z_mean) - K.exp(z_log_var),
    axis=-1)
```

`解码器`，输入z然后输出一个x'，目的是让x'和x的分布尽量一致，当两者完全一样时，中间的潜在变量z可以看作是x的一种压缩状态，包含了x的全部特征
```python
# 解码器
decoder_h = Dense(intermediate_dim, activation='relu')
decoder_mean = Dense(original_dim, activation='sigmoid')
h_decoded = decoder_h(z)
x_decoded_mean = decoder_mean(h_decoded)

# xent_loss是重构loss，
xent_loss = K.sum(K.binary_crossentropy(x, x_decoded_mean), axis=-1)
```



In [22]:
import tensorflow as tf
from tensorflow.keras import layers, models, backend as K
from tensorflow.keras.losses import binary_crossentropy
import scanpy as sc
import numpy as np
from sklearn import preprocessing

# 采样函数，应用重参数技巧
def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=tf.shape(z_mean))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

# 定义 VAE 模型
def get_vae_model(original_dim, intermediate_dim, latent_dim):
    
    # 编码器
    x = layers.Input(shape=(original_dim,))
    h = layers.Dense(intermediate_dim, activation='relu')(x)
    z_mean = layers.Dense(latent_dim)(h)
    z_log_var = layers.Dense(latent_dim)(h)

    # 采样层
    z = layers.Lambda(sampling, output_shape=(latent_dim,))([z_mean, z_log_var])

    # 解码器
    decoder_h = layers.Dense(intermediate_dim, activation='relu')
    decoder_mean = layers.Dense(original_dim, activation='sigmoid')
    h_decoded = decoder_h(z)
    x_decoded_mean = decoder_mean(h_decoded)

    # 建立VAE模型
    sc_vae = models.Model(x, x_decoded_mean)

    # Loss
    # xent_loss是重构loss，kl_loss是KL loss
    xent_loss = K.sum(K.binary_crossentropy(x, x_decoded_mean), axis=-1)
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    vae_loss = K.mean(xent_loss + kl_loss)

    # Encoder模型
    encoder = models.Model(x, z_mean)

    return sc_vae, vae_loss, encoder

# 参数
batch_size = 100
original_dim = 1838
latent_dim = 64
intermediate_dim = 256
epochs = 30


# =======================================
#  构建VAE模型
# =======================================
sc_vae, vae_loss, encoder = get_vae_model(
    original_dim, intermediate_dim, latent_dim
)
sc_vae.add_loss(vae_loss)
sc_vae.compile(optimizer='rmsprop')
sc_vae.summary()


# =======================================
# 数据加载
# =======================================
pbmc_data = sc.datasets.pbmc3k_processed()

# 读取记录
x_data = pbmc_data.X
y_test_ = pbmc_data.obs['louvain'].values

# 归一化
x_train = preprocessing.MinMaxScaler().fit_transform(x_data)

# 模型训练
sc_vae.fit(x_train,
        shuffle=True,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(x_train, None))


# 隐含向量


x_test_encoded = encoder.predict(x_train, batch_size=batch_size)


# =======================================
# 可视化
# =======================================
features = np.array(x_test_encoded).astype('float32')
adata = sc.AnnData(features)
adata.obs['label'] = y_test_
# 计算邻域图


sc.pp.neighbors(adata, n_neighbors=10, use_rep='X')

# 聚类
sc.tl.louvain(adata)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['louvain', 'label'], save='_pbmc3k_vae_louvain.png')

ValueError: A KerasTensor cannot be used as input to a TensorFlow function. A KerasTensor is a symbolic placeholder for a shape and dtype, used when constructing Keras Functional models or Keras Functions. You can only use it as input to a Keras layer or a Keras operation (from the namespaces `keras.layers` and `keras.operations`). You are likely doing something like:

```
x = Input(...)
...
tf_fn(x)  # Invalid.
```

What you should do instead is wrap `tf_fn` in a layer:

```
class MyLayer(Layer):
    def call(self, x):
        return tf_fn(x)

x = MyLayer()(x)
```


## 变分自编码+归一化
### 批归一化BN
batch normalization
* BN通过规范化与线性变换使得每一层网络的输入数据的均值和方差都在一定范围内，使得后一层网络不必不断去适应地层网络中输入的变化，从而实现了网络中层与层之间的解耦，允许每一层进行独立学习，有利于提高整个神经网络学习效率
$$
\mu_B = \frac{1}{m} \Sigma_{i=1}^m x_i\\
\sigma_B^2 = \frac{1}{2} \Sigma_{i=1}^m (x_i - \mu_B)^2\\
\hat{x_i} = \frac{x_i - \mu_B}{\sqrt{\sigma_B^2 + \epsilon}}\\
y_i = \gamma \hat{x_i} + \beta = BN_{\gamma, \beta}(x_i)
$$

### 层归一化LN
layer normalization
BN是按照样本数计算归一化统计量的，当样本数很少时，样本的均值和方差便不能反映全局的统计分布息，LN是一个独立于batch size的算法，所以无论样本数多少都不会影响参与LN计算的数据量
$$
\mu^l = \frac{1}{H} \Sigma_{i=1}^H (a_i^l)\\
\sigma^l = \sqrt{\frac{1}{H} \Sigma_{i=1}^H (a_i^l - \mu^l)^2 }
$$



### 组归一化



In [None]:
# 参数
batch_size = 100
original_dim = 1838
latent_dim = 64
intermediate_dim = 256
epochs = 30


# =======================================
#  构建VAE模型
# =======================================
batch_norm = True
sc_vae, vae_loss, encoder = get_vae_model(batch_norm=batch_norm)
sc_vae.add_loss(vae_loss)
sc_vae.compile(optimizer='rmsprop')
sc_vae.summary()


# =======================================
# 数据加载
# =======================================
pbmc_data = sc.datasets.pbmc3k_processed()

# 读取记录
x_data = pbmc_data.X
y_test_ = pbmc_data.obs['louvain'].values

# 归一化
x_train = preprocessing.MinMaxScaler().fit_transform(x_data)

# 模型训练
sc_vae.fit(x_train,
        shuffle=True,
        epochs=epochs,
        batch_size=batch_size,
        validation_data=(x_train, None))

# 隐含向量


x_test_encoded = encoder.predict(
    x_train, batch_size=batch_size)


# =======================================
# 可视化
# =======================================
features = np.array(x_test_encoded).astype('float32')
adata = sc.AnnData(features)
adata.obs['label'] = y_test_
# 计算邻域图


sc.pp.neighbors(adata, n_neighbors=10, use_rep='X')

# 聚类
sc.tl.louvain(adata)
sc.tl.umap(adata)
if batch_norm is True:
    sc.pl.umap(adata, color=['louvain', 'label'], 
               save='_pbmc3k_vae_batch_norm_louvain.png')
else:
    sc.pl.umap(adata, color=['louvain', 'label'], 
               save='_pbmc3k_vae_layer_norm_louvain.png')


## 变分自编码+HVG

## 自监督学习

## 自监督+批次效应矫正

## Transformer

## Transformer + HVG

## Transformer + Marker分析

## Transfromer代码解读


## 比较x还是z