# 非线性的神经网络
* 多输入模型
* 多输出模型
* 层组成的有向无环图

### keras灵活使用方式- 函数式API
* 直接操作张量，把层当做函数来使用，接收张量并返回张量

In [13]:
from keras.models import Sequential, Model
from keras import layers
from keras import Input

# 用Sequential模型实现线性神经网络
seq_model = Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

seq_model.summary()

# 用函数API实现
input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)

# Model类将输入张量和输出张量转换为一个模型,  keras在后台检索从I到O所包含的每一层，
                                        # 并将这些层组合成一个类图的数据结构
model = Model(input_tensor, output_tensor)
model.summary()
# 对Model实例进行编译、训练或评估时，与Sequential模型相同
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

import numpy as np
x_train = np.random.random((1000, 64))
y_train = np.random.random((1000, 10))

model.fit(x_train, y_train, epochs=10, batch_size=128)

score = model.evaluate(x_train, y_train)
score

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_22 (Dense)             (None, 32)                2080      
_________________________________________________________________
dense_23 (Dense)             (None, 32)                1056      
_________________________________________________________________
dense_24 (Dense)             (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________
Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 64)                0         
_________________________________________________________________
dense_25 (Dense)             (None, 32)                2080      
___________________________________

32.61466973876953

### 多输入模型
* 一个问答模型 - 一个自然语言描述问题输入到LSTM层， 一个文本片段输入到另一个LSTM层，
* 然后两个层的输出连接起来输入到Dense层

In [16]:
from keras.models import Model
from keras import layers
from keras import Input

text_vocabulayer_size = 10000
question_vocabulayer_size = 10000
answer_vocabulayer_size = 500

# 这里是文本输入，长度可变的整数序列， 对输入可以命名
text_input = Input(shape=(None,), dtype='int32', name='text')
# 将输入嵌入长度为64的向量
embedded_text = layers.Embedding(text_vocabulayer_size, 64)(text_input)
# 利用LSTM将向量编码为单个向量
encoded_text = layers.LSTM(32)(embedded_text)

# 对问题进行相同的处理
question_input = Input(shape=(None,), dtype='int32', name='question')
embedded_question = layers.Embedding(question_vocabulayer_size, 32)(question_input)
encoded_question = layers.LSTM(16)(embedded_question)

# 将编码后的问题和文本连接起来
concatenated = layers.concatenate([encoded_text,encoded_question], axis=-1)

# 添加一个softmax分类器
answer = layers.Dense(answer_vocabulayer_size, activation='softmax')(concatenated)

model = Model([text_input, question_input], answer)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])


#### 将数据输入到多输入模型

In [19]:
import numpy as np
import keras
num_samples = 1000
max_length = 100

text = np.random.randint(1, text_vocabulayer_size, size=(num_samples, max_length))
question = np.random.randint(1, question_vocabulayer_size, size=(num_samples, max_length))
answers = np.random.randint(answer_vocabulayer_size, size=(num_samples))
answers = keras.utils.to_categorical(answers, answer_vocabulayer_size)

model.fit([text, question], answers, epochs=10, batch_size=128)
# 当对输入进行命名时，可以使用输入组成的字典来拟合
# model.fit({'text':text, 'question':question}, answers, epochs=10, batch_size=128)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f41c4c1beb8>

### 多输出模型
* 将输入数据（一个人的社交媒体发帖）输入到一维卷积神经网络，将结果分别发往3个不同的Dense层输出不同的特征

In [None]:
from keras import layers
from keras import Input
from keras.models import Model

vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

# 输出层都应该具有名称
age_prediction = layers.Dense(1, name='age')(x)
income_prediction = layers.Dense(num_income_groups, activation='softmax', name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

model.compile(optimizer='rmsprop', loss={'age':'mse',
                                         'income':'categorical_crossentropy',
                                         'gender':'binary_crossentropy'},
                                   loss_weights={'age': 0.25,
                                                 'income': 1.,
                                                 'gender':10.})
model.fit(posts, {'age':age_targets,
                  'income':income_targets,
                  'gender':gender_targets},
         epochs = 10, batch_size=64)

### 层组成的有向无环图
* Inception模块 - 模块的堆叠，每个模块像小型的独立网络，有助于网络分别学习空间特征和逐通道的特征
* 残差连接 - 解决了大规模深度学习模型的梯度消失、表示瓶颈问题，和LSTM原理相似

In [None]:
# Inception
from keras import layers

# 每个分支都有相同的步幅值2
branch_a = layers.Conv2D(128, 1, activation='relu', strides=2)(x)

barach_b = layers.Conv2D(128, 1, activation='relu')(x)
branch_b = layers.Conv2D(128, 3, activation='relu', strides=2)(branch_b)

branch_c = layers.AveragePooling2D(3, strides=2)(x)
branch_c = layers.Conv2D(128, 3, activation='relu')(branch_c)

branch_d = layers.Conv2D(128, 1, activation='relu')(x)
branch_d = layers.Conv2D(128, 3, activation='relu')(branch_d)
branch_d = layers,Conv2D(128, 3, activation='relu', strides=2)(branch_d)

# 将分支输出连接到一起
output = layers.concatenate([branch_a, branch_b, branch_c, branch_d], axis=-1)


# 残差连接
# 1、恒等残差连接-如果特征图的尺寸相同
from keras import layers

x = ...
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
# 将原始x与输出特征相加
y = layers.add([y, x])

# 线性残差连接 - 特征图尺寸不同
y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
y = layers.MaxPooling2D(2, strides=2)(y)

# 使用1X1卷积， 将原始x张量线性下采样与y具有相同的形状
residual = layers.Conv2D(128, 1, strides=2, padding='same')(x)
# 将残差张亮与输出特征相加
 y = layers.add([y, residual])

### 共享层权重
* 几个分支同时对不同的输入集合学习 - 判断两个句子相似度，实例化一个共享LSTM层，该层的权重同时基于两个输入进行学习

In [None]:
from keras import layers, Input
from keras.models import Model

# 将一个LSTM层实例化一次
lstm = layers.LSTM(32)

# 构建模型的左分支
left_input = Input(shape=(None, 128))
left_output = lstm(left_input)

right_input = Input(shape=(None, 128))
right_output = lstm(right_input)

# 构建一个分类器
merged = layers.concatenate([left_output, right_output], axis=-1)
predictions = layers.Dense(1, activation='sigmoid')(merged)

# 基于两个输入对模型进行训练
model = Model([left_input, right_input], predictions)
model.fit([left_data, right_data], targets)

### 将模型作为层

In [None]:
from keras import layers, applications
from keras import Input

# 图像处理基础模型是Xception网络
xception_base = applications.Xception(weights=None, include_top=False)

# 输入是250X250的RGB图像
left_input = Input(shape=(250, 250, 3))
right_input = Input(shape=(250, 250, 3))

# 对相同的视觉模型调用两次
left_features = xception_base(left_input)
right_input = xception_base(right_input)

# 合并后的特征包含来自两个左右视觉输入中的信息
merged_features = layers.concatenate([left_features, right_input], axis=-1)