# Convolutional Neural Network for Text Classification
In this tutorial, we are going to implement a convolutional neural network to classify movie review dataset(positive or negative).

In [1]:
import tensorflow as tf
import numpy as np
from utils import *
from sklearn.cross_validation import train_test_split
% load_ext autoreload
% autoreload 2

In [2]:
x_pos = open('data/polarity/pos.txt').readlines()
x_neg = open('data/polarity/neg.txt').readlines()
y_pos = np.ones(len(x_pos))
y_neg = np.zeros(len(x_neg))
y = np.concatenate([y_pos, y_neg])

## Preprocessing

In [3]:
x, mask, word_to_idx, seq_length, vocab_size = preprocess(x_pos+x_neg)

In [4]:
# randomly shuffle data
np.random.seed(10)
random_idx = np.random.permutation(len(y))
x = x[random_idx]
y = y[random_idx]
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=42)

In [5]:
x_train.shape

(9595, 58)

In [None]:
self, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0

In [None]:
class TextCNN(object):
    
    def __init__(self, seq_length, num_class, vocab_size, dim_emb, filter_size, num_filter):
        
        
        self.x = tf.placeholder(tf.int32, [None, seq_length], name='x')
        self.y = tf.placeholder(tf.float32, [None], name='y')
        self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
        
        
        with tf.variable_scope('embedding'):
            w = tf.get_variable('w', shape=[vocab_size, dim_emb], initializer=tf.random_uniform_initializer(-1, 1))
            x_embed = tf.nn.embedding_lookup(w, self.x)    # (batch_size, seq_length, dim_emb)
            x_embed = tf.expand_dims(x_embed, -1)          # (batch_size, seq_length, dim_emb, 1)
            
        
        

In [9]:
x = tf.placeholder(tf.int32, [None, seq_length], name='x')


with tf.variable_scope('embedding'):
    w = tf.get_variable('w', shape=[10023, 128], initializer=tf.random_uniform_initializer(-1, 1))
    x_embed = tf.nn.embedding_lookup(w, x)    # (batch_size, seq_length, dim_emb)
    x_embed = tf.expand_dims(x_embed, -1)          # (batch_size, seq_length, dim_emb, 1)

In [20]:
out = conv2d(x_embed, name='conv4')

In [22]:
out

<tf.Tensor 'conv4/add:0' shape=(?, 58, 128, 128) dtype=float32>

In [21]:
def conv2d(x, filter_size=3, dim_emb=128, num_filter=128, name=None):
    """Computes 1d convolution operation
    
    Args:
        x: input tensor of shape (batch_size, seq_length, dim_emb, 1)
        channel_out: number of channel for output tensor
        filter_size: filter size; default is 3
        dim_emb: embedding size; default is 128
        num_filter: number of filter; default is 128
        
    Returns:
        out: output tensor of shape (batch_size, seq_length, 1, num_filter)
    """
    channel_in = x.get_shape()[-1]
    
    with tf.variable_scope(name):
        w = tf.get_variable('w', shape=[filter_size, dim_emb, 1, num_filter], 
                            initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable('b', shape=[num_filter], initializer=tf.constant_initializer(0.0))
        
        out = tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='VALID') + b
        
        return out
    
def linear(x, dim_out, name=None):
    """Computes linear transform (fully-connected layer)
    
    Args:
        x: input tensor of shape (batch_size, dim_in)
        dim_out: dimension for output tensor
        
    Returns:
        out: output tensor of shape (batch_size, dim_out)
    """
    dim_in = x.get_shape()[-1]
    
    with tf.variable_scope(name):
        w = tf.get_variable('w', shape=[dim_in, dim_out], initializer=tf.contrib.layers.xavier_initializer())
        b = tf.get_variable('b', shape=[dim_out], initializer=tf.constant_initializer(0.0))
        
        out = tf.matmul(x, w) + b
        
        return out