# Auto Poster Generation
## 当前任务
### 打分器（实际上可看作二分类问题）
#### 1. 输入
- 正样本：已有的海报图像，label为1
- 负样本：在现有海报图像的基础上随机搭配，label为0

#### 2. 网络结构
- 可以用现有的卷积基模型，可能需要fine-tune
- 也可以自己构造一个简单的模型（尝试）
- 对于卷积基提取的特征，后面接上Flatten和Dense层，最后做一个二分类

#### 3. 输出
- 输出的概率值既可以看作是打分器的分数

In [1]:
import os
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.5
set_session(tf.Session(config=config))

  return f(*args, **kwds)
Using TensorFlow backend.


In [2]:
standard_width = 200
standard_height = 280

### 读取数据集并进行预处理

In [3]:
import glob
import numpy as np
from PIL import Image

poster_positive = glob.glob('poster_positive/*.png')
poster_negative = glob.glob('poster_negative/*.png')
np.random.shuffle(poster_positive)
np.random.shuffle(poster_negative)
poster_positive_num = len(poster_positive)
poster_negative_num = len(poster_negative)

print("poster positive num: " + str(poster_positive_num))
print("poster negative num: " + str(poster_negative_num))

poster positive num: 369
poster negative num: 1690


In [4]:
import random

num_train_positive = 297
# num_train_negative = 1352
num_train_negative = 297
num_validation_positive = 36
# num_validation_negative = 169
num_validation_negative = 36
num_test_positive = 36
# num_test_negative = 169
num_test_negative = 36

X_train = np.empty((num_train_positive + num_train_negative, standard_height, standard_width, 3))
Y_train = np.empty((num_train_positive + num_train_negative, 1))

X_validation = np.empty((num_validation_positive + num_validation_negative, standard_height, standard_width, 3))
Y_validation = np.empty((num_validation_positive + num_validation_negative, 1))

X_test = np.empty((num_test_positive + num_test_negative, standard_height, standard_width, 3))
Y_test = np.empty((num_test_positive + num_test_negative, 1))

for i in range(num_train_positive):
    im = Image.open(poster_positive[i])
    X_train[i] = np.asarray(im.convert('RGB'), dtype='float64') / 255.0  
    Y_train[i] = 1
    
for i in range(num_train_negative):
    im = Image.open(poster_negative[i])
    X_train[num_train_positive + i] = np.asarray(im.convert('RGB'), dtype='float64') /255.0
    Y_train[num_train_positive + i] = 0
    
index = [i for i in range(len(X_train))]
random.shuffle(index)
X_train = X_train[index]
Y_train = Y_train[index]

for i in range(num_validation_positive):
    im = Image.open(poster_positive[num_train_positive + i])
    X_validation[i] = np.asarray(im.convert('RGB'), dtype='float64') / 255.0  
    Y_validation[i] = 1
    
for i in range(num_validation_negative):
    im = Image.open(poster_negative[num_train_negative + i])
    X_validation[num_validation_positive + i] = np.asarray(im.convert('RGB'), dtype='float64') /255.0
    Y_validation[num_validation_positive + i] = 0
    
for i in range(num_test_positive):
    im = Image.open(poster_positive[num_train_positive + num_validation_positive + i])
    X_test[i] = np.asarray(im.convert('RGB'), dtype='float64') / 255.0  
    Y_test[i] = 1
    
for i in range(num_test_negative):
    im = Image.open(poster_negative[num_train_negative + num_validation_negative + i])
    X_test[num_test_positive + i] = np.asarray(im.convert('RGB'), dtype='float64') /255.0
    Y_test[num_test_positive + i] = 0

In [5]:
print(Y_train)

[[1.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]


In [10]:
print(X_train.shape)

(594, 280, 200, 3)


### 尝试使用VGG16卷积基预训练模型

In [26]:
# 将VGG16卷积基实例化
from keras.applications import VGG16

conv_base = VGG16(weights='imagenet',
                  include_top=False,
                  input_shape=(standard_height, standard_width, 3))

In [27]:
conv_base.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 280, 200, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 280, 200, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 280, 200, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 140, 100, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 140, 100, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 140, 100, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 70, 50, 128)       0         
__________

In [28]:
# 在卷积基上添加一个密集链接分类器
from keras import models
from keras import layers

model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1,activation='sigmoid'))

In [29]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 8, 6, 512)         14714688  
_________________________________________________________________
flatten_2 (Flatten)          (None, 24576)             0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               6291712   
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
Total params: 21,006,657
Trainable params: 21,006,657
Non-trainable params: 0
_________________________________________________________________


In [30]:
# 冻结卷积基
conv_base.trainable = False
print(len(model.trainable_weights))

4


In [31]:
# 编译模型
model.compile(optimizer='Adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [37]:
# 训练模型
history = model.fit(X_train,
                    Y_train,
                    epochs=20,
                    batch_size=32,
                    validation_data=(X_validation, Y_validation))

model.save('scorer_vgg16.h5')

Train on 594 samples, validate on 72 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [33]:
results = model.evaluate(X_test, Y_test)
print(results)

[2.8304948703715804, 0.824390243902439]


In [38]:
model.predict(X_test)

array([[2.2202587e-28],
       [2.3821357e-35],
       [6.0592483e-31],
       [1.9981365e-30],
       [5.4140457e-35],
       [7.8003965e-26],
       [1.4171536e-33],
       [2.3576107e-34],
       [2.9098714e-35],
       [3.4425426e-29],
       [1.9481839e-36],
       [3.2786312e-33],
       [1.5490261e-29],
       [2.9990248e-27],
       [2.3948029e-33],
       [1.1887792e-32],
       [3.3592285e-31],
       [1.0056498e-31],
       [6.1116849e-32],
       [7.1948760e-31],
       [3.4700209e-28],
       [6.8970256e-32],
       [1.5003061e-32],
       [1.4438291e-29],
       [3.7191051e-32],
       [5.8929119e-30],
       [2.2870056e-31],
       [2.5476544e-30],
       [6.7750094e-33],
       [1.6833922e-34],
       [1.7524684e-27],
       [1.5371575e-31],
       [8.2595729e-32],
       [2.1702191e-26],
       [3.4147817e-33],
       [7.8177131e-31],
       [2.2628704e-32],
       [1.9103505e-35],
       [5.1653352e-33],
       [1.4244361e-28],
       [1.3822853e-30],
       [2.854752

### VGG16预训练模型效果不好，接下来重新训练一个以Xception为卷积基的网络

In [6]:
from keras.applications import Xception
from keras import models
from keras import layers

conv_base_xception = Xception(include_top=False,
                              input_shape=(standard_height, standard_width, 3))

In [7]:
conv_base_xception.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 280, 200, 3)  0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 139, 99, 32)  864         input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv1_bn (BatchNormaliza (None, 139, 99, 32)  128         block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_conv1_act (Activation)   (None, 139, 99, 32)  0           block1_conv1_bn[0][0]            
__________________________________________________________________________________________________
block1_con

In [8]:
# 在xception卷积基上添加一个密集链接分类器

model_xception = models.Sequential()
model_xception.add(conv_base_xception)
model_xception.add(layers.Flatten())
model_xception.add(layers.Dense(256, activation='relu'))
model_xception.add(layers.Dense(1,activation='sigmoid'))

In [9]:
model_xception.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
xception (Model)             (None, 9, 7, 2048)        20861480  
_________________________________________________________________
flatten_1 (Flatten)          (None, 129024)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               33030400  
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 257       
Total params: 53,892,137
Trainable params: 53,837,609
Non-trainable params: 54,528
_________________________________________________________________


In [10]:
print(len(model_xception.trainable_weights))

158


In [11]:
# 编译模型
model_xception.compile(optimizer='rmsprop',
                       loss='binary_crossentropy',
                       metrics=['accuracy']) 

In [15]:
# 训练模型
from keras.backend import get_session

get_session().run(tf.global_variables_initializer())

history_xception = model_xception.fit(X_train,
                                      Y_train,
                                      epochs=20,
                                      batch_size=16,
                                      validation_data=(X_validation, Y_validation))

model_xception.save('scorer_xception.h5')

Train on 594 samples, validate on 72 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### 尝试加入style matrix(gram matrix)，这里还是使用VGG16，因为只包含卷积和池化基本操作

In [7]:
# 将VGG16卷积基实例化，这次不含参数
from keras.applications import VGG16

conv_base_vgg16 = VGG16(include_top=False,
                        input_shape=(standard_height, standard_width, 3))

In [8]:
conv_base_vgg16.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 280, 200, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 280, 200, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 280, 200, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 140, 100, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 140, 100, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 140, 100, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 70, 50, 128)       0         
__________

In [72]:
# style matrix(gram matrix)

from keras import backend as K

# def gram_matrix(A):
#     """
#     Argument:
#     A -- matrix of shape (n_C, n_H*n_W)
    
#     Returns:
#     GA -- Gram matrix of A, of shape (n_C, n_C)
#     """
    
#     GA = K.dot(A, K.transpose(A))
    
#     return GA

def compute_layer_style(a_S):
#     """
#     Arguments:
#     a_S -- tensor of dimension (1, n_H, n_W, n_C), hidden layer activations representing style of the image S 

#     Returns: 
#     GS -- Gram matrix of S, of shape (n_C, n_C)        
#     """
    
    GS = K.batch_dot(a_S, a_S, axes=[1, 1])
    
    return GS

In [79]:
# 构建网络

from keras.models import Model
from keras import layers
from keras import Input

def model_vgg16_style(input_shape):
    
    X_input = Input(input_shape)
    
    X = conv_base_vgg16(X_input)
    
    m, n_H, n_W, n_C = X.get_shape().as_list()
    
    X = layers.core.Reshape([n_H*n_W, n_C])(X)
    
    X = layers.core.Lambda(compute_layer_style)(X)
    
    X = layers.Flatten()(X)
    
    X = layers.Dense(256, activation='relu')(X)
    
    X = layers.Dense(256, activation='relu')(X)
    
    X = layers.Dense(1, activation='sigmoid')(X)
    
    model_vgg16_style = Model(X_input, X)
    
    return model_vgg16_style

In [80]:
model_vgg16_style = model_vgg16_style((standard_height, standard_width, 3))

In [81]:
model_vgg16_style.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_25 (InputLayer)        (None, 280, 200, 3)       0         
_________________________________________________________________
vgg16 (Model)                (None, 8, 6, 512)         14714688  
_________________________________________________________________
reshape_4 (Reshape)          (None, 48, 512)           0         
_________________________________________________________________
lambda_18 (Lambda)           (None, 512, 512)          0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 262144)            0         
_________________________________________________________________
dense_31 (Dense)             (None, 256)               67109120  
_________________________________________________________________
dense_32 (Dense)             (None, 256)               65792     
__________

In [82]:
# 编译模型
model_vgg16_style.compile(optimizer='rmsprop',
                          loss='binary_crossentropy',
                          metrics=['accuracy'])

In [84]:
# 训练模型
from keras.backend import get_session

get_session().run(tf.global_variables_initializer())

history_vgg16_style = model_vgg16_style.fit(X_train,
                                            Y_train,
                                            epochs=5,
                                            batch_size=16,
                                            validation_data=(X_validation, Y_validation))

model_vgg16_style.save('scorer_vgg16_style.h5')

Train on 594 samples, validate on 72 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### 可能海报提取成高维特征后，就会不收敛，下面尝试搭建一个只带一次卷积池化的简单网络

In [87]:
from keras.models import Model
from keras import layers
from keras import Input

def Simple_Model(input_shape):
    
    X_input = Input(input_shape)
        
    X = layers.ZeroPadding2D((3, 3))(X_input)
        
    X = layers.Conv2D(32, (7, 7), strides = (1, 1), name = 'conv0')(X)
    X = layers.BatchNormalization(axis = 3, name = 'bn0')(X)
    X = layers.Activation('relu')(X)
        
    X = layers.MaxPooling2D((2, 2), name='max_pool')(X)
        
    X = layers.Flatten()(X)
    X = layers.Dense(1, activation='sigmoid', name='fc')(X)
        
    simple_model = Model(inputs = X_input, outputs = X, name='Simple_Model')
    
    
    return simple_model

In [88]:
simple_model = Simple_Model((standard_height, standard_width, 3))

In [89]:
simple_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_27 (InputLayer)        (None, 280, 200, 3)       0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 286, 206, 3)       0         
_________________________________________________________________
conv0 (Conv2D)               (None, 280, 200, 32)      4736      
_________________________________________________________________
bn0 (BatchNormalization)     (None, 280, 200, 32)      128       
_________________________________________________________________
activation_1 (Activation)    (None, 280, 200, 32)      0         
_________________________________________________________________
max_pool (MaxPooling2D)      (None, 140, 100, 32)      0         
_________________________________________________________________
flatten_3 (Flatten)          (None, 448000)            0         
__________

In [90]:
simple_model.compile(optimizer = "Adam",
                     loss = "binary_crossentropy",
                     metrics = ["accuracy"])

In [91]:
history_simple = simple_model.fit(X_train,
                                  Y_train,
                                  epochs=5,
                                  batch_size=16,
                                  validation_data=(X_validation, Y_validation))

Train on 594 samples, validate on 72 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### 尝试只用全连接层

In [5]:
from keras.models import Model
from keras import layers
from keras import Input

def dense_model(input_shape):
    
    X_input = Input(input_shape)
    
#     m, n_H, n_W, n_C = X_input.get_shape().as_list()
    
#     X = layers.core.Reshape([n_H*n_W, n_C])(X_input)
    
#     X = layers.core.Lambda(compute_layer_style)(X)
    
    X = layers.Flatten()(X_input)
    
    X = layers.Dense(512, activation='relu')(X)
    
    X = layers.Dense(512, activation='relu')(X)
    
    X = layers.Dense(1, activation='sigmoid')(X)
    
    dense_model = Model(inputs = X_input, outputs = X)
    
    return dense_model

In [6]:
dense_model = dense_model((standard_height, standard_width, 3))

In [7]:
dense_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 280, 200, 3)       0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 168000)            0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               86016512  
_________________________________________________________________
dense_2 (Dense)              (None, 512)               262656    
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 513       
Total params: 86,279,681
Trainable params: 86,279,681
Non-trainable params: 0
_________________________________________________________________


In [8]:
dense_model.compile(optimizer = "Adam",
                    loss = "binary_crossentropy",
                    metrics = ["accuracy"])

In [9]:
dense_simple = dense_model.fit(X_train,
                               Y_train,
                               epochs=5,
                               batch_size=16,
                               validation_data=(X_validation, Y_validation))

Train on 594 samples, validate on 72 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
