In [18]:
import os
import cv2
import tensorflow as tf
import numpy as np
import random
import utils
from sklearn.model_selection import train_test_split

In [19]:
image_size = 256
image_channel = 3
label_size = 150
unknown_label_size = 50
train_data_path = 'dataset/train_images'
save_path = 'v_saved_model/'
show_every_n = 10
saved_every_n = 100
train_step = 10000

In [24]:
class AlexNet(object):
    '''
    使用给定的训练数据集训练一个AlexNet模型
    '''
    def __init__(self,
                 batch_size=64,
                 num_units = 128,
                 num_classes = 150,
                 learning_rate = 0.002,
                 num_epoches = 1,
                 is_training=True):

        self.num_units = num_units
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epoches = num_epoches

        if is_training:
            self.batch_size = batch_size
            self.drop_prob = 0.5
        else:
            self.batch_size = 1
            self.drop_prob = 1.0

        self.build_model()

    def train_test_split(self, image_dataset, label_dataset):
        train_x, test_x, train_y, test_y = \
            train_test_split(image_dataset, label_dataset, test_size=0.1)
        return train_x, test_x,train_y, test_y

    def get_batch(self):
        for i in range(self.data_size // self.batch_size):
            batch_x = self.train_x[i * self.batch_size: (i + 1) * self.batch_size]
            batch_y = self.train_y[i * self.batch_size: (i + 1) * self.batch_size]
            yield batch_x, batch_y

    def build_input(self):
        input_x = tf.placeholder(
            tf.float32, [self.batch_size, image_size, image_size, image_channel], name='input_x')
        output_y = tf.placeholder(tf.float32, [self.batch_size, self.num_classes], name='output_y')
        keep_prob = tf.placeholder(tf.float32, name='drop_keep')
        return input_x, output_y, keep_prob

    def bulid_CNN(self, input_x):
        conv_layer1 = tf.layers.conv2d(inputs=input_x, filters=8, kernel_size=[8, 8],
                                       strides=[2, 2], padding='SAME',activation=tf.nn.relu)
        pooling_layer1 = tf.layers.max_pooling2d(
            inputs=conv_layer1, pool_size=[2, 2], strides=[2, 2])

        conv_layer2 = tf.layers.conv2d(inputs=pooling_layer1, filters=16, kernel_size=[4, 4],
                                       strides=[2, 2], padding='SAME',activation=tf.nn.relu)
        pooling_layer2 = tf.layers.max_pooling2d(
            inputs=conv_layer2, pool_size=[2, 2], strides=[2, 2])

        conv_layer3 = tf.layers.conv2d(inputs=pooling_layer2, filters=32, kernel_size=[4, 4],
                                       strides=[2, 2], padding='SAME',activation=tf.nn.relu)
        pooling_layer3 = tf.layers.max_pooling2d(
            inputs=conv_layer3, pool_size=[2, 2], strides=[2, 2])
        cnn_flat = tf.reshape(pooling_layer3, [self.batch_size, -1])
        return cnn_flat

    def bulid_full_connect(self, cnn_flat, keep_prob):
        fc_layer1 = tf.layers.dense(inputs=cnn_flat, units=self.num_units, activation=tf.nn.relu)
        fc_layer1 = tf.layers.dropout(fc_layer1, rate=keep_prob)
        output_layer = tf.layers.dense(inputs=fc_layer1, units=self.num_classes, activation=None)

        return output_layer

    def bulid_loss(self, logits, targets):
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets)
        loss = tf.reduce_mean(loss)
        return loss

    def bulid_optimizer(self, loss):
        optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(loss)
        return optimizer

    def bulid_accuracy(self, logits, targets):
        equality = tf.equal(tf.argmax(logits, axis=1), tf.argmax(targets, axis=1))
        equality = tf.cast(equality, tf.float32)
        accuracy = tf.reduce_mean(equality)
        return accuracy

    def build_model(self):
        tf.reset_default_graph()
        self.input_x, self.output_y, self.keep_prob = self.build_input()
        self.cnn_padding = self.bulid_CNN(self.input_x)
        self.output_layer = self.bulid_full_connect(self.cnn_padding, self.drop_prob)
        self.repsentation = self.output_layer
        self.loss = self.bulid_loss(self.output_layer, self.output_y)
        self.optimizer = self.bulid_optimizer(self.loss)
        self.accuracy = self.bulid_accuracy(self.output_layer, self.output_y)

    def train(self, image_dataset, label_dataset):
        self.train_x, self.test_x,self.train_y, self.test_y = \
            self.train_test_split(image_dataset, label_dataset)
        self.data_size = len(self.train_x)  # 4510

        saver = tf.train.Saver()
        #模型训练并将训练的结果保存在本地
        with tf.Session() as sess:
            print("AlexNet model training begins....")
            sess.run(tf.global_variables_initializer())
            global_steps = 0
            for epoch in range(self.num_epoches):
                generator = self.get_batch()
                for batch_x, batch_y in generator:
                    global_steps += 1
                    feed = {self.input_x: batch_x,
                            self.output_y: batch_y,
                            self.keep_prob: self.drop_prob}
                    show_loss, show_accu, _ = sess.run(
                        [self.loss, self.accuracy, self.optimizer], feed_dict=feed)

                    if global_steps % show_every_n == 0:
                        print('epoch: {}/{}..'.format(epoch+1, self.num_epoches),
                              'global_step: {}..'.format(global_steps),
                              'loss: {:.3f}..'.format(show_loss),
                              'accuracy: {:.2f}..'.format(show_accu))

                    if global_steps % saved_every_n == 0:
                        saver.save(sess, save_path+"e{}_s{}.ckpt".format(epoch, global_steps))
            saver.save(sess, save_path+"lastest.ckpt")

        print('training finished')


In [29]:
train_x = utils.get_image_data()
train_y = utils.get_one_hot_label_data()
label2int, int2label = utils.get_parameter()

In [31]:
model = AlexNet()
model.train(train_x, train_y)

AlexNet model training begins....
epoch: 1/2.. global_step: 10.. loss: 5.010.. accuracy: 0.00..
epoch: 1/2.. global_step: 20.. loss: 5.006.. accuracy: 0.02..
epoch: 1/2.. global_step: 30.. loss: 4.999.. accuracy: 0.00..
epoch: 1/2.. global_step: 40.. loss: 5.010.. accuracy: 0.00..
epoch: 1/2.. global_step: 50.. loss: 5.012.. accuracy: 0.00..
epoch: 1/2.. global_step: 60.. loss: 5.016.. accuracy: 0.00..
training finished


In [83]:
import tensorflow as tf
import numpy as np
import visual_model
import word_to_vec
import utils

image_repesentation_size = 150
word_embedding_size = 200
visual_model_checkpoints_dir = 'v_saved_model/'
devise_model_checkpoints_dir = 'devise_saved_model/'
word2vec_saved_dir = 'word2vec_saved/text8model.model'
show_every_n = 200
save_every_n = 1000
num_label = 150

In [84]:
class DeViSE(object):

    def __init__(self,
                 batch_size=1,
                 learning_rate=0.001):
        self.alex_model = visual_model.AlexNet(is_training=False)
        self.word_model = word_to_vec.get_word2vec()
        self.batch_size = batch_size
        self.learning_rate = learning_rate

        self.bulid_model()
        print('devise model has been initialized')

    def bulid_input(self):
        input_x = tf.placeholder(dtype=tf.float32, shape=[image_repesentation_size, self.batch_size], name='input_x')
        label_y = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, word_embedding_size], name='output_y')
        other_labels = tf.placeholder(dtype=tf.float32, shape=[self.batch_size, word_embedding_size], name='other_labels')
        return input_x, label_y, other_labels

    def bulid_linear_model(self, input_x, label_y, other_labels):
        margin = 0.1
        weight_matrix = tf.get_variable('weight_matrix', shape=[word_embedding_size, image_repesentation_size])
        output_ = tf.matmul(weight_matrix, input_x)
        loss1 = tf.reduce_sum(tf.matmul(label_y, output_))
        loss2 = tf.reduce_sum(tf.matmul(other_labels, output_))
        loss = tf.maximum(0.0, margin-loss1+loss2)
        return output_, loss

    def bulid_optimizer(self, loss):
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(loss)
        return optimizer

    def bulid_model(self):
        self.input_x, self.output_y, self.other_labels = self.bulid_input()
        self.prediction, self.loss = self.bulid_linear_model(self.input_x, self.output_y, self.other_labels)
        self.optimizer = self.bulid_optimizer(self.loss)

    def label_represent_set(self, label_data, int2label):
        label_set = list(set(label_data))
        label_set = [int2label[num] for num in label_set]
        self.vec_set = []
        for label in label_set:
            vector = self.word_model[label]
            self.vec_set.append(vector)


    def train(self, image_data, label_data, int2label):
        saver = tf.train.Saver()
        self.word_model_wv = self.word_model.wv
        with tf.Session() as sess:
            global_step = 0
            sess.run(tf.global_variables_initializer())
            for image, label in zip(image_data, label_data):
                global_step += 1
                image = np.array(image)
                image = np.reshape(image, (1, 256, 256, 3))
                feed = {self.alex_model.input_x: image}
                image_representation = sess.run(self.alex_model.repsentation, feed_dict=feed)
                image_representation = image_representation.T
                label = int2label[label]
                label = label.split('_')[0].lower()
                if label in self.word_model_wv.vocab:
                    label = self.word_model_wv[label]
                else:
                    label = self.word_model_wv['bird']
                other_label = np.random.randint(0, num_label)
                other_label = int2label[other_label]
                other_label = other_label.split('_')[0].lower()
                if other_label in self.word_model_wv.vocab:
                    other_label = self.word_model_wv[other_label]
                else:
                    other_label = self.word_model_wv['bird']
                other_label_representation = np.reshape(other_label, (1,200))
                label_representation = np.reshape(label, (1, 200))

                feed = {self.input_x: image_representation,
                        self.output_y: label_representation,
                        self.other_labels: other_label_representation}

                show_loss, _ = sess.run([self.loss, self.optimizer], feed_dict=feed)

                if global_step % show_every_n == 0 and show_loss > 0:
                    print('step: {}'.format(global_step),
                          'loss: {:.3f}'.format(show_loss))
                if global_step % save_every_n == 0:
                    saver.save(sess, devise_model_checkpoints_dir + 's{}.ckpt'.format(global_step))
            saver.save(sess, devise_model_checkpoints_dir + 'lastest.ckpt')

    def predict(self, image):
        lastest_checkpoint = tf.train.latest_checkpoint(devise_model_checkpoints_dir)
        saver = tf.train.Saver()
        with tf.Session() as sess:
            saver.restore(sess, lastest_checkpoint)
            image = np.array(image).reshape((1, 256, 256, 3))
            feed = {self.alex_model.input_x:image}
            image_repesentation = sess.run(self.alex_model.repsentation, feed_dict=feed)
            feed = {self.input_x: image_repesentation.T}
            label_representation = sess.run(self.prediction, feed_dict=feed)
            #
            label_representation = np.reshape(label_representation, (200))
            print(label_representation.shape)
            most_similar = self.word_model.similar_by_vector(label_representation, topn=1)

        prediction_label = most_similar[0][0]
        return prediction_label


In [34]:
train_x = utils.get_image_data()
label2int, int2label = utils.get_parameter()
numeral_labels = utils.get_numeral_label_data()

In [85]:
d_model = DeViSE()
d_model.train(train_x, numeral_labels, int2label)
prediction = d_model.predict(train_x[0])
prediction

devise model has been initialized
step: 1600 loss: 286.484
step: 1800 loss: 3522.257
step: 2000 loss: 8.668
step: 2400 loss: 0.100
step: 2800 loss: 1982.249
step: 3400 loss: 2322.872
step: 4200 loss: 322.985
INFO:tensorflow:Restoring parameters from devise_saved_model/lastest.ckpt
(200,)


  if np.issubdtype(vec.dtype, np.int):


'hungry'

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(train_x[0])
print(int2label[train_y[0].index(1)])

In [None]:
model = AlexNet(train_x, train_y)
model.train()

In [None]:
from gensim.models import word2vec

In [None]:
word_embedding_size = 200
sentences = word2vec.Text8Corpus('text8_data/text8')
model = word2vec.Word2Vec(sentences, size=word_embedding_size)

In [None]:
y1 = model.similarity('woman', 'man')
print(y1)
y2 = model.most_similar('good', topn=1)
print(y2[0][0])

In [None]:
class AlexMNIST(object):
    '''
    使用给定的训练数据集训练一个AlexNet模型
    '''
    def __init__(self, 
                 batch_size=64,
                 num_units = 128,
                 num_classes = 10,
                 learning_rate = 0.002,
                 num_epoches = 1,
                 is_training=True):

        self.num_units = num_units
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.num_epoches = num_epoches

        if is_training:
            self.batch_size = batch_size
            self.drop_prob = 0.5
        else:
            self.batch_size = 1
            self.drop_prob = 1.0
            
        self.build_model()

    def build_input(self):
        input_x = tf.placeholder(tf.float32, [self.batch_size, 784])
        output_y = tf.placeholder(tf.float32, [self.batch_size, self.num_classes], name='output_y')
        keep_prob = tf.placeholder(tf.float32, name='drop_keep')
        return input_x, output_y, keep_prob

    def bulid_CNN(self, input_x):
        conv_layer1 = tf.layers.conv2d(inputs=input_x, filters=32, kernel_size=[5, 5], 
                                       strides=[1, 1], padding='SAME',activation=tf.nn.relu)
        pooling_layer1 = tf.layers.max_pooling2d(
            inputs=conv_layer1, pool_size=[2, 2], strides=[1, 1])

        conv_layer2 = tf.layers.conv2d(inputs=pooling_layer1, filters=32, kernel_size=[5, 5], 
                                       strides=[1, 1], padding='SAME',activation=tf.nn.relu)
        pooling_layer2 = tf.layers.max_pooling2d(
            inputs=conv_layer2, pool_size=[2, 2], strides=[1, 1])

        cnn_flat = tf.reshape(pooling_layer2, [self.batch_size, -1])
        return cnn_flat

    def bulid_full_connect(self, cnn_flat, keep_prob):
        fc_layer1 = tf.layers.dense(inputs=cnn_flat, units=self.num_units, activation=tf.nn.relu)
        fc_layer1 = tf.layers.dropout(fc_layer1, rate=self.drop_prob)
        output_layer = tf.layers.dense(inputs=fc_layer1, units=self.num_classes, activation=None)

        return output_layer

    def bulid_loss(self, logits, targets):
        loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=targets)
        loss = tf.reduce_mean(loss)
        return loss

    def bulid_optimizer(self, loss):
        optimizer = tf.train.AdamOptimizer(self.learning_rate).minimize(loss)
        return optimizer

    def bulid_accuracy(self, logits, targets):
        equality = tf.equal(tf.argmax(logits, axis=1), tf.argmax(targets, axis=1))
        equality = tf.cast(equality, tf.float32)
        accuracy = tf.reduce_mean(equality)
        return accuracy

    def build_model(self):
        tf.reset_default_graph()
        self.input_x, self.output_y, self.keep_prob = self.build_input()
        self.reshape_x = tf.reshape(self.input_x, [self.batch_size, 28, 28, 1])
        self.cnn_padding = self.bulid_CNN(self.reshape_x)
        self.output_layer = self.bulid_full_connect(self.cnn_padding, self.keep_prob)
        self.loss = self.bulid_loss(self.output_layer, self.output_y)
        self.optimizer = self.bulid_optimizer(self.loss)
        self.accuracy = self.bulid_accuracy(self.output_layer, self.output_y)

    def train(self,data=None):
        saver = tf.train.Saver()
        #模型训练并将训练的结果保存在本地
        with tf.Session() as sess:
            print("AlexNet model training begins....")
            sess.run(tf.global_variables_initializer())
            global_steps = 0
            for epoch in range(self.num_epoches):
                for step in range(5000):
                    batch_x, batch_y = data.train.next_batch(self.batch_size)
                    feed = {self.input_x: batch_x,
                        self.output_y: batch_y,
                        self.keep_prob: self.drop_prob}
                    show_loss, show_accu, _ = sess.run(
                        [self.loss, self.accuracy, self.optimizer], feed_dict=feed)

                    if step % show_every_n == 0:
                        print('epoch: {}/{}..'.format(epoch+1, self.num_epoches+1),
                              'global_step: {}..'.format(global_steps),
                              'loss: {:.3f}..'.format(show_loss),
                              'accuracy: {:.2f}..'.format(show_accu))
        print('training finished')

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
def get_MNIST():
    mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
    return mnist
mnist = get_MNIST()