# Convolutional Neural Network for Text Classification
In this tutorial, we are going to implement a convolutional neural network to classify movie review dataset(positive or negative).

In [68]:
import tensorflow as tf
import numpy as np
import os
from utils import preprocess 
from model import T
from sklearn.cross_validation import train_test_split
% load_ext autoreload
% autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load the dataset

In [69]:
x_pos = open('data/polarity/pos.txt').readlines()
x_neg = open('data/polarity/neg.txt').readlines()
y_pos = np.ones(len(x_pos))
y_neg = np.zeros(len(x_neg))
y = np.concatenate([y_pos, y_neg])

In [70]:
print len(x_pos)
print len(x_neg)
print x_pos[3]
print x_neg[0]

5331
5331
if you sometimes like to go to the movies to have fun , wasabi is a good place to start . 

simplistic , silly and tedious . 



## Preprocessing

In [71]:
x, mask, word_to_idx, seq_length, vocab_size = preprocess(x_pos+x_neg)

In [73]:
# randomly shuffle data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)

print x_train.shape
print x_test.shape
print y_train.shape
print y_test.shape

(9595, 58)
(1067, 58)
(9595,)
(1067,)


# Define and construct model

In [5]:
model=TextCNN(batch_size=100, seq_length=58, num_class=2, vocab_size=18768, 
                 dim_emb=256, filter_sizes=[3,4,5], num_filters=[50,50,50])

# Train and test the model

In [6]:
num_epoch = 2
batch_size = 100
num_iter_per_epoch = x_train.shape[0] / batch_size
log_path = 'log/'
model_save_path = 'model/'

if not os.path.exists(log_path):
    os.makedirs(log_path)
if not os.path.exists(model_save_path):
    os.makedirs(model_save_path)

config = tf.ConfigProto(allow_soft_placement = True)
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
    # initialize parameters
    tf.initialize_all_variables().run()
    summary_writer = tf.train.SummaryWriter(logdir=log_path, graph=tf.get_default_graph())

    for e in range(num_epoch):
        for i in range(num_iter_per_epoch):
            # train the discriminator
            x_batch = x_train[i*batch_size:(i+1)*batch_size]
            y_batch = y_train[i*batch_size:(i+1)*batch_size]
            feed_dict = {model.x: x_batch, model.y: y_batch}
            sess.run(model.train_op, feed_dict)

            

            if i % 10 == 0:
                summary, loss, acc = sess.run([model.summary_op, model.loss, model.accuracy], feed_dict)
                summary_writer.add_summary(summary, e*num_iter_per_epoch + i)
                print ('Epoch: [%d] Step: [%d/%d] loss: [%.6f] accuracy: [%.6f]' %(e+1, i+1, num_iter_per_epoch, loss, acc))

            if i % 500 == 0:  
                model.saver.save(sess, os.path.join(model_save_path, 'textcnn-%d' %(e+1)), global_step=i+1) 
                print ('model/textcnn-%d-%d saved' %(e+1, i+1))
                
                
    
    num_iter_per_epoch = int(x_test.shape[0] / batch_size)
    test_accuracy = 0.0
    for i in range(num_iter_per_epoch):
        x_batch = x_test[i*batch_size:(i+1)*batch_size]
        y_batch = y_test[i*batch_size:(i+1)*batch_size]
        acc = sess.run(model.accuracy, feed_dict={model.x: x_batch, model.y: y_batch})
        test_accuracy += acc

    print ("Test accuracy: %.3f" %(test_accuracy/num_iter_per_epoch))

Epoch: [1] Step: [1/95] loss: [0.529008] accuracy: [0.800000]
model/textcnn-1-1 saved
Epoch: [1] Step: [11/95] loss: [0.493539] accuracy: [0.880000]
Epoch: [1] Step: [21/95] loss: [0.498853] accuracy: [0.890000]
Epoch: [1] Step: [31/95] loss: [0.497739] accuracy: [0.730000]
Epoch: [1] Step: [41/95] loss: [0.451746] accuracy: [0.920000]
Epoch: [1] Step: [51/95] loss: [0.495826] accuracy: [0.870000]
Epoch: [1] Step: [61/95] loss: [0.455766] accuracy: [0.820000]
Epoch: [1] Step: [71/95] loss: [0.404962] accuracy: [0.920000]
Epoch: [1] Step: [81/95] loss: [0.364094] accuracy: [0.910000]
Epoch: [1] Step: [91/95] loss: [0.397500] accuracy: [0.890000]
Epoch: [2] Step: [1/95] loss: [0.388932] accuracy: [0.810000]
model/textcnn-2-1 saved
Epoch: [2] Step: [11/95] loss: [0.337945] accuracy: [0.930000]
Epoch: [2] Step: [21/95] loss: [0.351719] accuracy: [0.930000]
Epoch: [2] Step: [31/95] loss: [0.311224] accuracy: [0.950000]
Epoch: [2] Step: [41/95] loss: [0.269772] accuracy: [0.980000]
Epoch: [2