https://github.com/LahiruSen/crnn.git
sst2_cnn_rnn_keras1.py

In [14]:
cd /content/drive/My Drive/University/FYP/Sentiment Analysis/Implementation/Sentiment Analysis/CNN RNN/CNN LSTM GRU/

/content/drive/My Drive/University/FYP/Sentiment Analysis/Implementation/Sentiment Analysis/CNN RNN/CNN LSTM GRU


# Imports

In [0]:
from __future__ import print_function
import numpy as np
# np.random.seed(3435)  # for reproducibility, should be first

import keras
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dropout, Activation, Flatten, \
    Embedding, Convolution1D, MaxPooling1D, AveragePooling1D, \
    Input, Dense, merge,Add
from keras.regularizers import l2
from keras.layers.recurrent import LSTM, GRU, SimpleRNN
from keras.constraints import maxnorm
from keras.datasets import imdb
from keras import callbacks
from keras.utils import generic_utils
from keras.models import Model
from keras.optimizers import Adadelta
import time
import pickle

# Initialize

In [0]:
batch_size = 32
nb_filter = 200
filter_length = 4
hidden_dims = nb_filter * 2
nb_epoch = 5
RNN = GRU
rnn_output_size = 300
folds = 10

dataset_path='/content/drive/My Drive/University/FYP/Sentiment Analysis/Implementation/Sentiment Analysis/SLSTM/parsed_data/from_word2vec/parsed_datasentiment_tagger_dataset'
fasttext_path = '/content/drive/My Drive/University/FYP/Sentiment Analysis/Implementation/Sentiment Analysis/SLSTM/parsed_data/from_word2vec/parsed_data_sentiment_tagger_vectors'

# Load Embedding Matrix


In [82]:
f = open(fasttext_path, 'rb')
matrix= np.array(pickle.load(f))
vocab_size=matrix.shape[0]
print(vocab_size)

155


# Load Data 

In [0]:
def remove_unk(x, n_words):
    return [[1 if w >= n_words else w for w in sen] for sen in x] 

def load_data(path, n_words):
    with open(path, 'rb') as f:
        dataset_x, dataset_label= pickle.load(f)
        train_set_x, train_set_y = dataset_x[0], dataset_label[0]
        test_set_x, test_set_y = dataset_x[1], dataset_label[1]
    #remove unknown words
    train_set_x = remove_unk(train_set_x, n_words)
    test_set_x = remove_unk(test_set_x, n_words)

    return np.array(train_set_x) , np.array(train_set_y),np.array(test_set_x), np.array(test_set_y)

In [0]:
X_train, y_train,X_test, y_test = load_data(dataset_path ,vocab_size)

In [92]:
print(y_train)

['P' 'O' 'S' 'I' 'T' 'I' 'V' 'E']


In [0]:
# truncate and pad input sequences
max_review_length = 155
X_train = sequence.pad_sequences(X_train, maxlen=max_review_length, padding='post')
X_test = sequence.pad_sequences(X_test, maxlen=max_review_length,padding='post')

In [86]:
# X_train, y_train, X_test, y_test, W, W2 = load_data()
maxlen = X_train.shape[1]
max_features = vocab_size
embedding_dims = 300

print('Train...')
accs = []
first_run = True
print(len(X_train), 'train sequences')
print(len(X_test), 'test sequences')
print('X_train shape:', X_train.shape)
print('X_test shape:', X_test.shape)
# rand_idx = np.random.permutation(range(len(X_train)))
# X_train = X_train[rand_idx]
# y_train = y_train[rand_idx]

Train...
8 train sequences
8 test sequences
X_train shape: (8, 155)
X_test shape: (8, 155)


# Build Model

In [0]:
def build_model():
    main_input = Input(shape=(maxlen, ), dtype='int32', name='main_input')
    embedding  = Embedding(max_features, embedding_dims,
                  weights=[matrix], input_length=maxlen,
                  name='embedding' ,trainable=True)(main_input)

    embedding = Dropout(0.50)(embedding)

    conv4 = Convolution1D(nb_filter=nb_filter,
                          filter_length=4,
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1,
                          name='conv4')(embedding)
    maxConv4 = MaxPooling1D(pool_length=2,
                             name='maxConv4')(conv4)

    conv5 = Convolution1D(nb_filter=nb_filter,
                          filter_length=5,
                          border_mode='valid',
                          activation='relu',
                          subsample_length=1,
                          name='conv5')(embedding)
    maxConv5 = MaxPooling1D(pool_length=2,
                            name='maxConv5')(conv5)

    # x = Add([maxConv4, maxConv5])
    # x = keras.layers.Add()([maxConv4, maxConv5])
    x = keras.layers.Concatenate(axis=1)([maxConv4, maxConv5])

    x = Dropout(0.15)(x)

    x = RNN(rnn_output_size)(x)

    x = Dense(hidden_dims, activation='relu', init='he_normal',
              W_constraint = maxnorm(3), b_constraint=maxnorm(3),
              name='mlp')(x)

    x = Dropout(0.10, name='drop')(x)

    output = Dense(1, init='he_normal',
                   activation='sigmoid', name='output')(x)

    model = Model(input=main_input, output=output)
    model.compile(loss={'output':'binary_crossentropy'},
                optimizer=Adadelta(lr=0.95, epsilon=1e-06),
                metrics=["accuracy"])
    return model

# Main

In [89]:
model = build_model()
if first_run:
    first_run = False
    print(model.summary())

best_val_acc = 0
best_test_acc = 0
for j in range(nb_epoch):
    a = time.time()
    his = model.fit(X_train, y_train,
                    batch_size=batch_size,
                    validation_data=[X_test, y_test],
                    shuffle=True,
                    nb_epoch=1, verbose=1)
    print('Epoch %d/%d\t%s' % (j + 1, nb_epoch, str(his.history)))
    if his.history['val_accuracy'][0] >= best_val_acc:
        score, acc = model.evaluate(X_test, y_test,
                                    batch_size=batch_size,
                                    verbose=2)
        best_val_acc = his.history['val_accuracy'][0]
        best_test_acc = acc
        print('Got best epoch  best val acc is %f test acc is %f' %
              (best_val_acc, best_test_acc))
        if len(accs) > 0:
            print('Current avg test acc:', str(np.mean(accs)))
    b = time.time()
    cost = b - a
    left = (nb_epoch - j - 1)
    print('One round cost %ds, %d round %ds %dmin left' % (cost, left,
                                                           cost * left,
                                                           cost * left / 60.0))
accs.append(best_test_acc)
print('Avg test acc:', str(np.mean(accs)))

  
  app.launch_new_instance()
  
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 8 samples, validate on 8 samples
Epoch 1/1


ValueError: ignored