In [64]:
import os

file_path_neg = "........................................."
file_path_pos = "........................................."
# Define the path to the output file
output_file_path = "......................................\\vocab.txt"

In [65]:
import re
import string
import unicodedata
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from collections import Counter


def clean_text(text, min_freq=2):
    # Remove HTML tags
    text = re.sub(r'<[^>]+>', '', text)
    
    # Remove numbers
    text = re.sub(r'\d+', '', text)
    
    # Remove punctuation
    text = text.translate(str.maketrans("", "", string.punctuation))
    
    # Remove special characters
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    
    # Remove accented characters
    text = unicodedata.normalize('NFKD', text).encode('ASCII', 'ignore').decode('utf-8')
    
    # Convert to lowercase
    text = text.lower()
    
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = text.split()
    filtered_tokens = [token for token in tokens if token not in stop_words]
    
    # Lemmatize
    lemmatizer = WordNetLemmatizer()
    tokens = filtered_tokens
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens]
    
    # Remove words with low frequency
    word_counts = Counter(lemmatized_tokens)
    filtered_tokens = [token for token in lemmatized_tokens if word_counts[token] > min_freq]
    
    text = ' '.join(filtered_tokens)
    
    return text

In [66]:
# load doc into memory
def load_doc(filename):
    # open the file as read only
    file = open(filename, 'r')
    # read all text
    text = file.read()
    # close the file
    file.close()
    return text

In [67]:
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

# retrieve the raw text 
vocab_filename = 'vocab.txt'
vocab = load_doc(vocab_filename)
# tokenize the raw text
tokens = word_tokenize(vocab)
# count the frequency of occurrences
fdist = FreqDist(tokens)

In [68]:
print(len(tokens))
# Print top 50 most word and its frequency
print(fdist.most_common(50))

152341
[('film', 10318), ('movie', 5885), ('one', 4533), ('character', 2629), ('like', 2297), ('scene', 1541), ('get', 1503), ('time', 1477), ('even', 1259), ('story', 1238), ('make', 1222), ('good', 1207), ('life', 888), ('would', 879), ('also', 833), ('much', 803), ('see', 787), ('two', 710), ('way', 686), ('first', 684), ('really', 628), ('thing', 621), ('well', 607), ('bad', 601), ('go', 596), ('action', 584), ('plot', 557), ('know', 556), ('year', 551), ('people', 525), ('he', 508), ('man', 507), ('love', 481), ('take', 480), ('new', 466), ('little', 465), ('alien', 462), ('u', 457), ('performance', 452), ('never', 437), ('could', 432), ('world', 423), ('many', 421), ('come', 409), ('work', 402), ('great', 394), ('star', 387), ('show', 384), ('best', 381), ('actor', 376)]


In [69]:
vocab = set(vocab.split())

In [70]:
# turn a doc into clean tokens
def clean_doc(doc, vocab):
    # split into tokens by white space
    tokens = doc.split()
    # prepare regex for char filtering
    re_punc = re.compile('[%s]' % re.escape(string.punctuation))
    # remove punctuation from each word
    tokens = [re_punc.sub('', w) for w in tokens]
    # filter out tokens not in vocab
    tokens = [w for w in tokens if w in vocab]
    tokens = ' '.join(tokens)
    return tokens


In [71]:
# load all docs in a directory
def process_docs(directory, vocab, is_train):
    documents = list()
    # walk through all files in the folder
    for filename in os.listdir(directory):
        # skip any reviews in the test set
        if is_train and filename.startswith('cv9'):
            continue
        if not is_train and not filename.startswith('cv9'):
            continue
        # create the full path of the file to open
        path = directory + '\\' + filename
        # load the doc
        doc = load_doc(path)
        # clean doc
        tokens = clean_doc(doc, vocab)
        # add to list
        documents.append(tokens)
    return documents


In [72]:
from numpy import array

def load_clean_dataset(vocab, is_train):
    # load documents
    neg = process_docs(file_path_neg, vocab, is_train)
    pos = process_docs(file_path_pos, vocab, is_train)
    docs = neg + pos
    # prepare labels
    labels = array([0 for _ in range(len(neg))] + [1 for _ in range(len(pos))])
    return docs, labels


In [73]:
train_docs, ytrain = load_clean_dataset(vocab, True)
test_docs, ytest = load_clean_dataset(vocab, False)

In [74]:
ytrain[0],train_docs[0]

(0,
 'plot two teen go church party drink drive get accident one girlfriend see life whats deal watch movie find movie teen generation cool idea bad package review even harder one write since generally attempt break head lost highway memento there good bad making didnt one seem taken pretty concept movie well main problem simply normal fantasy world as audience member idea whats going there there coming back dead there others who look like dead there strange there there chase there weird happen simply explained dont mind trying film every give clue get kind biggest problem obviously got big secret seems want completely final five make entertaining thrilling even engaging really sad part arrow like actually point start make little bit sense still didnt make film entertaining guess bottom line like always make sure audience even given secret enter world mean showing melissa sagemiller running away throughout movie okay get there people chasing dont know who really need see giving us diff

In [75]:
# calculate the maximum sequence length
max_length = max([len(s.split()) for s in train_docs])
print('Maximum length: %d' % max_length)

Maximum length: 914


In [76]:
from keras.preprocessing.text import Tokenizer

# fit a tokenizer
def create_tokenizer(lines):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(lines)
    return tokenizer

In [77]:
# create the tokenizer`
tokenizer = create_tokenizer(train_docs)

In [78]:
# define vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print('Vocabulary size: %d' % vocab_size)

Vocabulary size: 5552


In [79]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# integer encode and pad documents
def encode_docs(tokenizer, max_length, docs):
    # integer encode
    encoded = tokenizer.texts_to_sequences(docs)
    # pad sequences
    padded = pad_sequences(encoded, maxlen=max_length, padding='post')
    return padded

In [80]:
# encode data
Xtrain = encode_docs(tokenizer, max_length, train_docs)
Xtest = encode_docs(tokenizer, max_length, test_docs)

In [81]:
from keras.models import Sequential
from keras.layers import Embedding, Conv1D, MaxPooling1D, Flatten, Dense 
from keras.utils.vis_utils import plot_model

# define the model
def define_model(vocab_size, max_length):
    model = Sequential()
    model.add(Embedding(vocab_size, 100, input_length=max_length))
    model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # compile network
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # summarize defined model
    model.summary()
    plot_model(model, to_file='model.png', show_shapes=True)
    return model


In [82]:
model = define_model(vocab_size, max_length)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 914, 100)          555200    
                                                                 
 conv1d_1 (Conv1D)           (None, 907, 32)           25632     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 453, 32)          0         
 1D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 14496)             0         
                                                                 
 dense_2 (Dense)             (None, 10)                144970    
                                                                 
 dense_3 (Dense)             (None, 1)                 11        
                                                      

In [83]:
# fit network
model.fit(Xtrain, ytrain, epochs=10, verbose=2)

Epoch 1/10
57/57 - 6s - loss: 0.6918 - accuracy: 0.5372 - 6s/epoch - 105ms/step
Epoch 2/10
57/57 - 5s - loss: 0.5966 - accuracy: 0.6844 - 5s/epoch - 87ms/step
Epoch 3/10
57/57 - 5s - loss: 0.4237 - accuracy: 0.9056 - 5s/epoch - 91ms/step
Epoch 4/10
57/57 - 5s - loss: 0.3499 - accuracy: 0.9600 - 5s/epoch - 90ms/step
Epoch 5/10
57/57 - 5s - loss: 0.3115 - accuracy: 0.9839 - 5s/epoch - 91ms/step
Epoch 6/10
57/57 - 5s - loss: 0.2885 - accuracy: 0.9883 - 5s/epoch - 91ms/step
Epoch 7/10
57/57 - 5s - loss: 0.2727 - accuracy: 0.9922 - 5s/epoch - 88ms/step
Epoch 8/10
57/57 - 5s - loss: 0.2590 - accuracy: 0.9944 - 5s/epoch - 89ms/step
Epoch 9/10
57/57 - 5s - loss: 0.2482 - accuracy: 0.9933 - 5s/epoch - 88ms/step
Epoch 10/10
57/57 - 5s - loss: 0.2370 - accuracy: 0.9944 - 5s/epoch - 96ms/step


<keras.callbacks.History at 0x1d8047727d0>

In [84]:
# save the model
model.save('model.h5')

In [85]:
# evaluate model on training dataset
loss, acc = model.evaluate(Xtrain, ytrain, verbose=0)
print('Train Accuracy: %.2f%%' % (acc * 100))


Train Accuracy: 99.44%


In [86]:
# evaluate model on test dataset
loss, acc = model.evaluate(Xtest, ytest, verbose=0)
print('Test Accuracy: %.2f%%' % (acc*100))

Test Accuracy: 83.00%


In [87]:
def predict_sentiment(review, vocab, tokenizer, max_length, model):
    # clean review
    line = clean_doc(review, vocab)
    # encode and pad review
    padded = encode_docs(tokenizer, max_length, [line])
    # predict sentiment
    yhat = model.predict(padded, verbose=0)
    # retrieve predicted percentage and label
    percent_pos = yhat[0,0]
    if round(percent_pos) == 0:
        return (1-percent_pos), 'NEGATIVE'
    return percent_pos, 'POSITIVE'


## Truncated Sequences:
#### Truncate reviews to a mean length of review lengths.

In [89]:
# calculate the mean sequence length
mean_length = int(sum([len(s.split()) for s in train_docs]) / len(train_docs))
print('Mean length: %d' % mean_length)

Mean length: 244


In [90]:
# integer encode and pad documents
def encode_docs_mean(tokenizer, mean_length, docs):
    # integer encode
    encoded = tokenizer.texts_to_sequences(docs)
    # pad sequences
    padded = pad_sequences(encoded, maxlen=mean_length, padding='post')
    return padded

In [91]:
# encode data
Xtrain_mean = encode_docs_mean(tokenizer, mean_length, train_docs)
Xtest_mean = encode_docs_mean(tokenizer, mean_length, test_docs)

In [125]:
import tensorflow.keras.backend as K
K.clear_session()

# define the model
def define_model_mean(vocab_size, mean_length):
    model = Sequential()
    model.add(Embedding(vocab_size, 100, input_length=mean_length))
    model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # compile network
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # summarize defined model
    model.summary()
    plot_model(model, to_file='model_mean.png', show_shapes=True)
    return model


In [126]:
model_mean = define_model_mean(vocab_size, mean_length)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 244, 100)          555200    
                                                                 
 conv1d (Conv1D)             (None, 237, 32)           25632     
                                                                 
 max_pooling1d (MaxPooling1D  (None, 118, 32)          0         
 )                                                               
                                                                 
 flatten (Flatten)           (None, 3776)              0         
                                                                 
 dense (Dense)               (None, 10)                37770     
                                                                 
 dense_1 (Dense)             (None, 1)                 11        
                                                        

In [127]:
# fit network
model_mean.fit(Xtrain_mean, ytrain, epochs=10, verbose=2)

Epoch 1/10
57/57 - 3s - loss: 0.6919 - accuracy: 0.5261 - 3s/epoch - 44ms/step
Epoch 2/10
57/57 - 2s - loss: 0.5677 - accuracy: 0.7844 - 2s/epoch - 38ms/step
Epoch 3/10
57/57 - 2s - loss: 0.1970 - accuracy: 0.9406 - 2s/epoch - 36ms/step
Epoch 4/10
57/57 - 2s - loss: 0.0327 - accuracy: 0.9972 - 2s/epoch - 32ms/step
Epoch 5/10
57/57 - 2s - loss: 0.0077 - accuracy: 1.0000 - 2s/epoch - 38ms/step
Epoch 6/10
57/57 - 2s - loss: 0.0031 - accuracy: 1.0000 - 2s/epoch - 34ms/step
Epoch 7/10
57/57 - 2s - loss: 0.0016 - accuracy: 1.0000 - 2s/epoch - 37ms/step
Epoch 8/10
57/57 - 2s - loss: 0.0010 - accuracy: 1.0000 - 2s/epoch - 36ms/step
Epoch 9/10
57/57 - 2s - loss: 6.5521e-04 - accuracy: 1.0000 - 2s/epoch - 36ms/step
Epoch 10/10
57/57 - 3s - loss: 4.6226e-04 - accuracy: 1.0000 - 3s/epoch - 49ms/step


<keras.callbacks.History at 0x1d87b38c610>

In [128]:
# save the model
model_mean.save('model_mean.h5')

In [130]:
# evaluate model on training dataset
loss_train_mean, acc_train_mean = model_mean.evaluate(Xtrain_mean, ytrain, verbose=0)
print('Train Accuracy for mean length: %.2f%%' % (acc_train_mean * 100))


Train Accuracy for mean length: 100.00%


In [131]:
# evaluate model on test dataset
loss_test_mean, acc_test_mean = model_mean.evaluate(Xtest_mean, ytest, verbose=0)
print('Test Accuracy: %.2f%%' % (acc_test_mean*100))

Test Accuracy: 81.50%


## Truncated Vocabulary: 
#### We removed infrequently occurring words, but still had a large vocabulary of more than 25,000 words. Perform further reducing the size of the vocabulary and the effect on model skill.

In [98]:
# turn a doc into clean tokens
def clean_doc_new(doc, vocab):
    # split into tokens by white space
    tokens = doc.split()
    # prepare regex for char filtering
    re_punc = re.compile('[%s]' % re.escape(string.punctuation))
    # remove punctuation from each word
    tokens = [re_punc.sub('', w) for w in tokens]
    # filter out tokens not in vocab
    tokens = [w for w in tokens if w in vocab]
    # remove stop words
    stop_words = set(stopwords.words('english'))
    tokens = [w for w in tokens if not w in stop_words]
    # lemmatize tokens
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(w) for w in tokens]
    tokens = ' '.join(tokens)
    return tokens


In [99]:
# load all docs in a directory
def process_docs_new(directory, vocab, is_train):
    documents = list()
    # walk through all files in the folder
    for filename in os.listdir(directory):
        # skip any reviews in the test set
        if is_train and filename.startswith('cv9'):
            continue
        if not is_train and not filename.startswith('cv9'):
            continue
        # create the full path of the file to open
        path = directory + '\\' + filename
        # load the doc
        doc = load_doc(path)
        # clean doc
        tokens = clean_doc_new(doc, vocab)
        # add to list
        documents.append(tokens)
    return documents


In [100]:
def load_clean_dataset_new(vocab, is_train):
    # load documents
    neg = process_docs_new(file_path_neg, vocab, is_train)
    pos = process_docs_new(file_path_pos, vocab, is_train)
    docs = neg + pos
    # prepare labels
    labels = array([0 for _ in range(len(neg))] + [1 for _ in range(len(pos))])
    return docs, labels


In [101]:
train_docs_new, ytrain_new = load_clean_dataset_new(vocab, True)
test_docs_new, ytest_new = load_clean_dataset_new(vocab, False)

In [102]:
# calculate the mean sequence length
mean_length_new = int(sum([len(s.split()) for s in train_docs_new]) / len(train_docs_new))
print('Mean length: %d' % mean_length_new)

Mean length: 226


In [103]:
# create the tokenizer`
tokenizer = create_tokenizer(train_docs_new)

In [104]:
# define vocabulary size
vocab_size_new = len(tokenizer.word_index) + 1
print('New vocabulary size: %d' % vocab_size_new)

New vocabulary size: 5543


In [105]:
# integer encode and pad documents
def encode_docs_new(tokenizer, mean_length_new, docs):
    # integer encode
    encoded = tokenizer.texts_to_sequences(docs)
    # pad sequences
    padded = pad_sequences(encoded, maxlen=mean_length_new, padding='post')
    return padded

In [106]:
# encode data
Xtrain_new = encode_docs_new(tokenizer, mean_length_new, train_docs_new)
Xtest_new = encode_docs_new(tokenizer, mean_length_new, test_docs_new)

In [132]:
# define the model
def define_model_new(vocab_size_new, mean_length_new):
    model = Sequential()
    model.add(Embedding(vocab_size_new, 100, input_length=mean_length_new))
    model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # compile network
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # summarize defined model
    model.summary()
    plot_model(model, to_file='model_new.png', show_shapes=True)
    return model


In [133]:
model_new = define_model_new(vocab_size_new, mean_length_new)

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 226, 100)          554300    
                                                                 
 conv1d_1 (Conv1D)           (None, 219, 32)           25632     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 109, 32)          0         
 1D)                                                             
                                                                 
 flatten_1 (Flatten)         (None, 3488)              0         
                                                                 
 dense_2 (Dense)             (None, 10)                34890     
                                                                 
 dense_3 (Dense)             (None, 1)                 11        
                                                      

In [134]:
# fit network
model_new.fit(Xtrain_new, ytrain_new, epochs=10, verbose=2)

Epoch 1/10
57/57 - 3s - loss: 0.6909 - accuracy: 0.5394 - 3s/epoch - 44ms/step
Epoch 2/10
57/57 - 2s - loss: 0.5294 - accuracy: 0.8183 - 2s/epoch - 38ms/step
Epoch 3/10
57/57 - 2s - loss: 0.1524 - accuracy: 0.9489 - 2s/epoch - 37ms/step
Epoch 4/10
57/57 - 2s - loss: 0.0272 - accuracy: 0.9972 - 2s/epoch - 32ms/step
Epoch 5/10
57/57 - 2s - loss: 0.0059 - accuracy: 1.0000 - 2s/epoch - 39ms/step
Epoch 6/10
57/57 - 2s - loss: 0.0028 - accuracy: 1.0000 - 2s/epoch - 33ms/step
Epoch 7/10
57/57 - 2s - loss: 0.0017 - accuracy: 1.0000 - 2s/epoch - 36ms/step
Epoch 8/10
57/57 - 2s - loss: 0.0012 - accuracy: 1.0000 - 2s/epoch - 35ms/step
Epoch 9/10
57/57 - 2s - loss: 8.8626e-04 - accuracy: 1.0000 - 2s/epoch - 33ms/step
Epoch 10/10
57/57 - 2s - loss: 7.0310e-04 - accuracy: 1.0000 - 2s/epoch - 39ms/step


<keras.callbacks.History at 0x1d820db57e0>

In [135]:
# save the model
model_new.save('model_new.h5')

In [136]:
# evaluate model on training dataset
loss_train_new, acc_train_new = model_new.evaluate(Xtrain_new, ytrain_new, verbose=0)
print('Train Accuracy: %.2f%%' % (acc_train_new * 100))


Train Accuracy: 100.00%


In [137]:
# evaluate model on test dataset
loss_test_new, acc_test_new = model_new.evaluate(Xtest_new, ytest_new, verbose=0)
print('Test Accuracy: %.2f%%' % (acc_test_new*100))

Test Accuracy: 81.50%


## Use Embedding:
#### Experiment loading the pre-trained GloVe or any other pretrained embedding and the impact on model skill with and without further fine-tuning during training.

In [114]:
# Load GloVe embeddings into dictionary
glove_embeddings = {}
with open('glove.6B.100d.txt', 'r', encoding='utf-8') as f:
    for line in f:
        values = line.split()
        word = values[0]
        vector = np.asarray(values[1:], dtype='float32')
        glove_embeddings[word] = vector

In [116]:
# Create embedding matrix from GloVe embeddings
embedding_matrix = np.zeros((vocab_size_new, 100))
for word, i in tokenizer.word_index.items():
    embedding_vector = glove_embeddings.get(word)
    if embedding_vector is not None:
        embedding_matrix[i] = embedding_vector

In [119]:
def define_model_glove(vocab_size_new, mean_length_new, embedding_matrix):
    model = Sequential()
    model.add(Embedding(vocab_size_new, 100, input_length=mean_length_new, 
                        weights=[embedding_matrix], trainable=False))
    model.add(Conv1D(filters=32, kernel_size=8, activation='relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    # compile network
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    # summarize defined model
    model.summary()
    plot_model(model, to_file='model_glove.png', show_shapes=True)
    return model


In [120]:
model_glove = define_model_glove(vocab_size_new, mean_length_new, embedding_matrix)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 226, 100)          554300    
                                                                 
 conv1d_2 (Conv1D)           (None, 219, 32)           25632     
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 109, 32)          0         
 1D)                                                             
                                                                 
 flatten_2 (Flatten)         (None, 3488)              0         
                                                                 
 dense_4 (Dense)             (None, 10)                34890     
                                                                 
 dense_5 (Dense)             (None, 1)                 11        
                                                      

In [121]:
# fit network
model_glove.fit(Xtrain_new, ytrain_new, epochs=10, verbose=2)

Epoch 1/10
57/57 - 2s - loss: 0.6939 - accuracy: 0.5033 - 2s/epoch - 39ms/step
Epoch 2/10
57/57 - 1s - loss: 0.6932 - accuracy: 0.4894 - 944ms/epoch - 17ms/step
Epoch 3/10
57/57 - 1s - loss: 0.6931 - accuracy: 0.5006 - 932ms/epoch - 16ms/step
Epoch 4/10
57/57 - 1s - loss: 0.6931 - accuracy: 0.4822 - 1s/epoch - 18ms/step
Epoch 5/10
57/57 - 1s - loss: 0.6931 - accuracy: 0.5000 - 1s/epoch - 24ms/step
Epoch 6/10
57/57 - 1s - loss: 0.6930 - accuracy: 0.5000 - 943ms/epoch - 17ms/step
Epoch 7/10
57/57 - 1s - loss: 0.6930 - accuracy: 0.4850 - 920ms/epoch - 16ms/step
Epoch 8/10
57/57 - 1s - loss: 0.6925 - accuracy: 0.4822 - 930ms/epoch - 16ms/step
Epoch 9/10
57/57 - 2s - loss: 0.6927 - accuracy: 0.5011 - 2s/epoch - 32ms/step
Epoch 10/10
57/57 - 1s - loss: 0.6925 - accuracy: 0.4922 - 1s/epoch - 23ms/step


<keras.callbacks.History at 0x1d8023d44f0>

In [122]:
# save the model
model_glove.save('model_glove.h5')

In [123]:
# evaluate model on training dataset
loss, acc = model_glove.evaluate(Xtrain_new, ytrain_new, verbose=0)
print('Train Accuracy: %.2f%%' % (acc * 100))


Train Accuracy: 50.00%


In [124]:
# evaluate model on test dataset
loss, acc = model_glove.evaluate(Xtest_new, ytest_new, verbose=0)
print('Test Accuracy: %.2f%%' % (acc*100))

Test Accuracy: 50.00%
