# Convolutional Neural Network for Text Classification
In this tutorial, we are going to implement a convolutional neural network to classify movie review dataset(positive or negative).

In [1]:
import tensorflow as tf
import numpy as np
import os
from utils import preprocess 
from model import TextCNN
from sklearn.cross_validation import train_test_split
% load_ext autoreload
% autoreload 2

## Load the dataset

In [2]:
x_pos = open('data/polarity/pos.txt').readlines()
x_neg = open('data/polarity/neg.txt').readlines()
y_pos = np.ones(len(x_pos))
y_neg = np.zeros(len(x_neg))
y = np.concatenate([y_pos, y_neg])

In [3]:
print (len(x_pos))
print (len(x_neg))
print (x_pos[3])
print (x_neg[0])

5331
5331
if you sometimes like to go to the movies to have fun , wasabi is a good place to start . 

simplistic , silly and tedious . 



## Preprocessing

In [4]:
x, mask, word_to_idx, seq_length, vocab_size = preprocess(x_pos+x_neg)

In [5]:
# randomly shuffle data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

print (x_train.shape)
print (x_test.shape)
print (y_train.shape)
print (y_test.shape)

(8529, 58)
(2133, 58)
(8529,)
(2133,)


# Define and construct model

In [6]:
model=TextCNN(batch_size=100, seq_length=seq_length, num_class=2, vocab_size=vocab_size, 
                 dim_emb=128, filter_sizes=[2,3,4], num_filters=30)

# Train and test the model

In [7]:
num_epoch = 5
batch_size = 100
num_iter_per_epoch = x_train.shape[0] / batch_size
log_path = 'log/'
model_save_path = 'model/'

if not os.path.exists(log_path):
    os.makedirs(log_path)
if not os.path.exists(model_save_path):
    os.makedirs(model_save_path)

config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    # initialize parameters
    tf.initialize_all_variables().run()
    summary_writer = tf.train.SummaryWriter(logdir=log_path, graph=tf.get_default_graph())

    for e in range(num_epoch):
        for i in range(num_iter_per_epoch):
            # train the discriminator
            x_batch = x_train[i*batch_size:(i+1)*batch_size]
            y_batch = y_train[i*batch_size:(i+1)*batch_size]
            feed_dict = {model.x: x_batch, model.y: y_batch}
            sess.run(model.train_op, feed_dict)

            

            if i % 10 == 0:
                summary, loss, acc = sess.run([model.summary_op, model.loss, model.accuracy], feed_dict)
                summary_writer.add_summary(summary, e*num_iter_per_epoch + i)
                print ('Epoch: [%d] Step: [%d/%d] loss: [%.6f] accuracy: [%.6f]' %(e+1, i+1, num_iter_per_epoch, loss, acc))

            if i % 500 == 0:  
                model.saver.save(sess, os.path.join(model_save_path, 'textcnn-%d' %(e+1)), global_step=i+1) 
                print ('model/textcnn-%d-%d saved' %(e+1, i+1))
                
                
    
    num_iter_per_epoch = int(x_test.shape[0] / batch_size)
    test_accuracy = 0.0
    for i in range(num_iter_per_epoch):
        x_batch = x_test[i*batch_size:(i+1)*batch_size]
        y_batch = y_test[i*batch_size:(i+1)*batch_size]
        acc = sess.run(model.accuracy, feed_dict={model.x: x_batch, model.y: y_batch})
        test_accuracy += acc

    print ("Test accuracy: %.3f" %(test_accuracy/num_iter_per_epoch))

Epoch: [1] Step: [1/85] loss: [0.575043] accuracy: [0.900000]
model/textcnn-1-1 saved
Epoch: [1] Step: [11/85] loss: [0.653879] accuracy: [0.640000]
Epoch: [1] Step: [21/85] loss: [0.660412] accuracy: [0.610000]
Epoch: [1] Step: [31/85] loss: [0.658146] accuracy: [0.640000]
Epoch: [1] Step: [41/85] loss: [0.634064] accuracy: [0.640000]
Epoch: [1] Step: [51/85] loss: [0.622959] accuracy: [0.680000]
Epoch: [1] Step: [61/85] loss: [0.599468] accuracy: [0.710000]
Epoch: [1] Step: [71/85] loss: [0.567238] accuracy: [0.760000]
Epoch: [1] Step: [81/85] loss: [0.645155] accuracy: [0.660000]
Epoch: [2] Step: [1/85] loss: [0.518303] accuracy: [0.810000]
model/textcnn-2-1 saved
Epoch: [2] Step: [11/85] loss: [0.549249] accuracy: [0.790000]
Epoch: [2] Step: [21/85] loss: [0.538894] accuracy: [0.770000]
Epoch: [2] Step: [31/85] loss: [0.555874] accuracy: [0.770000]
Epoch: [2] Step: [41/85] loss: [0.517703] accuracy: [0.810000]
Epoch: [2] Step: [51/85] loss: [0.483076] accuracy: [0.780000]
Epoch: [2