In [1]:
import numpy as np
import sys
import pickle
import tensorflow as tf
from tensorflow.keras import layers
from keras.preprocessing.sequence import pad_sequences
from keras.utils import CustomObjectScope
from keras.models import load_model
from keras.initializers import glorot_uniform

Using TensorFlow backend.


In [2]:
# Load the model

with CustomObjectScope({'GlorotUniform': glorot_uniform()}):
    model = load_model('sentiment_lstm.h5')
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 60, 200)           18000200  
_________________________________________________________________
dropout (Dropout)            (None, 60, 200)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 128)               168448    
_________________________________________________________________
dense (Dense)                (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
activation (Activation)      (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 65        
__________

In [3]:
# Define some helper functions

unigrams_file = '/home/mingj/project/tweet_training_data/large/sentiment140-freqdist.pkl'
label = ['NEGATIVE', 'POSITIVE']

def top_n_words(pkl_file_name, N, shift=0):
    with open(pkl_file_name, 'rb') as pkl_file:
        freq_dist = pickle.load(pkl_file)
    most_common = freq_dist.most_common(N)
    words = {p[0]: i + shift for i, p in enumerate(most_common)}
    return words

def get_feature_vector(tweet):
    words = tweet.split()
    feature_vector = []
    for i in range(len(words) - 1):
        word = words[i]
        if vocab.get(word) is not None:
            feature_vector.append(vocab.get(word))
    if len(words) >= 1:
        if vocab.get(words[-1]) is not None:
            feature_vector.append(vocab.get(words[-1]))
    return feature_vector

In [4]:
# Predict on a single tweet

test_tweet = 'what a stupid leader'

# some params
max_length = 60
vocab_size = 90000
vocab = top_n_words(unigrams_file, vocab_size, shift=1)

# generate feature vector
test_tweet_vector = get_feature_vector(test_tweet)
test_tweet_vector = np.reshape(test_tweet_vector, (1, len(test_tweet_vector)))
test_tweet_vector = pad_sequences(test_tweet_vector, maxlen = max_length, padding = 'post')
predictions = model.predict(test_tweet_vector, batch_size=128, verbose=1)
results = label[np.round(predictions[:, 0]).astype(int)[0]]



In [5]:
print('A.I. Prediction: This sentence seems a', results, 'one.')

A.I. Prediction: This sentence seems a NEGATIVE one.


In [6]:
# Predict on a single tweet

test_tweet = 'what a beautiful world'

# some params
max_length = 60
vocab_size = 90000
vocab = top_n_words(unigrams_file, vocab_size, shift=1)

# generate feature vector
test_tweet_vector = get_feature_vector(test_tweet)
test_tweet_vector = np.reshape(test_tweet_vector, (1, len(test_tweet_vector)))
test_tweet_vector = pad_sequences(test_tweet_vector, maxlen = max_length, padding = 'post')
predictions = model.predict(test_tweet_vector, batch_size=128, verbose=1)
results = label[np.round(predictions[:, 0]).astype(int)[0]]



In [7]:
print('A.I. Prediction: This sentence seems a', results, 'one.')

A.I. Prediction: This sentence seems a POSITIVE one.
