# Convolutional Neural Network for Text Classification
In this tutorial, we are going to implement a convolutional neural network to classify movie review dataset(positive or negative).

In [1]:
import tensorflow as tf
import numpy as np
import os
from utils import *
from sklearn.cross_validation import train_test_split
% load_ext autoreload
% autoreload 2

In [2]:
x_pos = open('data/polarity/pos.txt').readlines()
x_neg = open('data/polarity/neg.txt').readlines()
y_pos = np.ones(len(x_pos))
y_neg = np.zeros(len(x_neg))
y = np.concatenate([y_pos, y_neg])

## Preprocessing

In [3]:
x, mask, word_to_idx, seq_length, vocab_size = preprocess(x_pos+x_neg)

# randomly shuffle data
np.random.seed(10)
random_idx = np.random.permutation(len(y))
x = x[random_idx]
y = y[random_idx]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)

# Define and construct model

In [4]:
class TextCNN(object):
    
    def __init__(self, batch_size=100, seq_length=58, num_class=2, vocab_size=18768, 
                 dim_emb=128, filter_sizes=[3,4,5], num_filters=[100,100,100]):
        """
        Args:
            seq_length: maximum sequence length
            num_class: number of class; default is 2 (postive or negative)
            vocab_size: vocabulary size; number of different words
            dim_emb: embedding size
            filter_sizes: list for filter size; e.g [3, 4, 5]
            num_filters: list for number of filter; e.g [128, 128, 128]    
        """
        
        self.x = tf.placeholder(tf.int32, [None, seq_length], name='x')
        self.y = tf.placeholder(tf.int64, [None], name='y')
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        
        
        with tf.variable_scope('embedding_layer'):
            w = tf.get_variable('w', shape=[vocab_size, dim_emb], initializer=tf.random_uniform_initializer(-1, 1))
            x_embed = tf.nn.embedding_lookup(w, self.x)    # (batch_size, seq_length, dim_emb)
            x_embed = tf.expand_dims(x_embed, 3)          # (batch_size, seq_length, dim_emb, 1)
            
        pooled_outputs = []
        num_total_filter = 0
        for i, (f_s, n_f) in enumerate(zip(filter_sizes, num_filters)):
            num_total_filter += n_f
            
            with tf.variable_scope('conv_maxpool_%d' %(i+1)):
                w = tf.get_variable('w', shape=[f_s, dim_emb, 1, n_f], initializer=tf.contrib.layers.xavier_initializer())
                b = tf.get_variable('b', shape=[n_f], initializer=tf.constant_initializer(0.0))
                
                conv = tf.nn.conv2d(x_embed, w, strides=[1, 1, 1, 1], padding='VALID') + b   # (batch_size, seq_length - filter_size + 1, 1, num_filter)
                relu = tf.nn.relu(conv)
                pooled = tf.nn.max_pool(relu, [1, seq_length - f_s + 1, 1, 1], [1, 1, 1, 1], padding='VALID')
                pooled_outputs.append(pooled)  # (number of diffent filter) @ [batch_size, 1, 1, num_filter]
        
        pooled = tf.concat(3, pooled_outputs)
        pooled = tf.reshape(pooled, [batch_size, -1])
        
        with tf.name_scope("dropout"):
            pooled = tf.nn.dropout(pooled, self.dropout_keep_prob)
        
        with tf.variable_scope('output_layer'):
    
            w = tf.get_variable('w', shape=[num_total_filter, num_class], initializer=tf.contrib.layers.xavier_initializer())
            b = tf.get_variable('b', shape=[num_class], initializer=tf.constant_initializer(0.0))
            
            out = tf.matmul(pooled, w) + b    # (batch_size, num_class)
        
        with tf.name_scope('optimizer'):
            self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(out, self.y))
            self.train_op = tf.train.AdamOptimizer(0.001, beta1=0.5).minimize(self.loss)        
        
        with tf.name_scope('evaluation'):
            self.pred = tf.arg_max(out, 1)
            self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.pred, self.y), tf.float32))
            
            
        with tf.name_scope('summary'):
            tf.scalar_summary('batch_loss', self.loss)
            tf.scalar_summary('accuracy', self.accuracy)
            for var in tf.trainable_variables():
                tf.histogram_summary(var.op.name, var)
            
            self.summary_op = tf.merge_all_summaries() 
        self.saver = tf.train.Saver()

In [5]:
model=TextCNN()

# Train and test the model

In [None]:
num_epoch = 2
batch_size = 100
num_iter_per_epoch = x_train.shape[0] / batch_size
log_path = 'log/'
model_save_path = 'model/'

if not os.path.exists(log_path):
    os.makedirs(log_path)
if not os.path.exists(model_save_path):
    os.makedirs(model_save_path)

config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    # initialize parameters
    tf.initialize_all_variables().run()
    summary_writer = tf.train.SummaryWriter(logdir=log_path, graph=tf.get_default_graph())

    for e in range(num_epoch):
        for i in range(num_iter_per_epoch):
            # train the discriminator
            x_batch = x_train[i*batch_size:(i+1)*batch_size]
            y_batch = y_train[i*batch_size:(i+1)*batch_size]
            keep_prob = 1.0
            feed_dict = {model.x: x_batch, model.y: y_batch, model.dropout_keep_prob: keep_prob}
            sess.run(model.train_op, feed_dict)

            

            if i % 10 == 0:
                summary, loss, acc = sess.run([model.summary_op, model.loss, model.accuracy], feed_dict)
                summary_writer.add_summary(summary, e*num_iter_per_epoch + i)
                print ('Epoch: [%d] Step: [%d/%d] loss: [%.6f] accuracy: [%.6f]' %(e+1, i+1, num_iter_per_epoch, loss, acc))

            if i % 500 == 0:  
                model.saver.save(sess, os.path.join(model_save_path, 'textcnn-%d' %(e+1)), global_step=i+1) 
                print ('model/textcnn-%d-%d saved' %(e+1, i+1))
                
                
    
    num_iter_per_epoch = int(x_test.shape[0] / batch_size)
    test_accuracy = 0.0
    for i in range(num_iter_per_epoch):
        x_batch = x_test[i*batch_size:(i+1)*batch_size]
        y_batch = y_test[i*batch_size:(i+1)*batch_size]
        keep_prob = 1.0
        acc = sess.run(model.accuracy, feed_dict={model.x: x_batch, model.y: y_batch, model.dropout_keep_prob: keep_prob})
        test_accuracy += acc

    print ("Test accuracy: %.3f" %(test_accuracy/num_iter_per_epoch))

Epoch: [1] Step: [1/95] loss: [0.498247] accuracy: [1.000000]
model/textcnn-1-1 saved
Epoch: [1] Step: [11/95] loss: [0.527520] accuracy: [0.860000]
Epoch: [1] Step: [21/95] loss: [0.543972] accuracy: [0.800000]
Epoch: [1] Step: [31/95] loss: [0.515876] accuracy: [0.800000]
Epoch: [1] Step: [41/95] loss: [0.487641] accuracy: [0.880000]
Epoch: [1] Step: [51/95] loss: [0.528076] accuracy: [0.830000]
Epoch: [1] Step: [61/95] loss: [0.495067] accuracy: [0.830000]
Epoch: [1] Step: [71/95] loss: [0.442330] accuracy: [0.880000]
Epoch: [1] Step: [81/95] loss: [0.396540] accuracy: [0.930000]
Epoch: [1] Step: [91/95] loss: [0.456583] accuracy: [0.870000]
Epoch: [2] Step: [1/95] loss: [0.383526] accuracy: [0.890000]
