In [1]:
import tensorflow as tf
import json
import pickle
from keras.models import model_from_json
from tensorflow import keras
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif,chi2
import numpy as np
import pandas as pd
from keras.callbacks import EarlyStopping
from tensorflow.python.keras import models
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras.layers import Dropout
import keras.preprocessing.text as kpt
from keras.preprocessing.text import Tokenizer

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


Download Dataset from http://thinknook.com/twitter-sentiment-analysis-training-corpus-dataset-2012-09-22/

In [2]:
training = np.genfromtxt('Sentiment Analysis Dataset.csv', delimiter=',', skip_header=1, usecols=(1, 3), dtype=None)

train_x = [x[1] for x in training]
train_y = np.asarray([x[0] for x in training])

  """Entry point for launching an IPython kernel.


In [3]:
def mlp_model(layers, units, dropout_rate, input_shape, num_classes):
    """Creates an instance of a multi-layer perceptron model.

    # Arguments
        layers: int, number of `Dense` layers in the model.
        units: int, output dimension of the layers.
        dropout_rate: float, percentage of input to drop at Dropout layers.
        input_shape: tuple, shape of input to the model.
        num_classes: int, number of output classes.

    # Returns
        An MLP model instance.
    """
    model = models.Sequential()
    model.add(Dropout(rate=dropout_rate, input_shape=input_shape))

    for _ in range(layers-1):
        model.add(Dense(units=units, activation='relu'))
        model.add(Dropout(rate=dropout_rate))

    model.add(Dense(units=1, activation='sigmoid'))
    return model

In [4]:
# Create keyword arguments to pass to the 'tf-idf' vectorizer.
kwargs = {
            'ngram_range': (1,2),  # Use 1-grams + 2-grams.
            'dtype': 'int32',
            'strip_accents': 'unicode',
            'decode_error': 'replace',
            'analyzer': 'word',  # Split text into word tokens.
            'min_df':  2,
}
vectorizer = TfidfVectorizer(**kwargs)

# Learn vocabulary from training texts and vectorize training texts.
train_x = vectorizer.fit_transform(train_x)

# Select top 'k' of the vectorized features.
selector = SelectKBest(chi2, k=min(20000, train_x.shape[1]))
train_x = selector.fit_transform(train_x, train_y).astype('float32')

In [8]:
# Defining the parameters for the model
layers=2
units=32
dropout_rate=0.3
input_shape=train_x.shape[1:]
num_classes=2


# Create model instance.
model=mlp_model(layers,units,dropout_rate,input_shape,num_classes)

model.compile(loss='binary_crossentropy',
  optimizer=tf.keras.optimizers.Adam(),
  metrics=['accuracy'])

# Train and validate model.
model.fit(train_x, train_y,
  batch_size=512,
  epochs=20,
  verbose=1,
  validation_split=0.2,
  callbacks = [EarlyStopping(monitor='val_loss', patience=1)],     
  shuffle=True)

model_json = model.to_json()
with open('model.json', 'w') as json_file:
    json_file.write(model_json)

model.save_weights('model.h5')

Train on 1262901 samples, validate on 315726 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


In [11]:
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
# and create a model from that
model = model_from_json(loaded_model_json)
# and weight your nodes with your saved values
model.load_weights('model.h5')
while 1:
    evalSentence =input('Input a sentence to be evaluated, or Enter to quit: ')

    if len(evalSentence) == 0:
        break

    # Format your input for the neural net
    evalSentence=[evalSentence]
    tx = vectorizer.transform(evalSentence).astype('float32')
    tx = selector.transform(tx).astype('float32')
    pred = model.predict(tx)
    if pred[0][0]>0.5:
        print("Positive Sentiment.Confidence Level:",(pred[0][0]-0.5)*200,"%")
    else:
        print("Negative Sentiment.Confidence Level:",(0.5-pred[0][0])*200,"%")

Input a sentence to be evaluated, or Enter to quit: i am very sad
Negative Sentiment.Confidence Level: 99.68916536308825 %
Input a sentence to be evaluated, or Enter to quit: he is a coward
Positive Sentiment.Confidence Level: 30.955064296722412 %
Input a sentence to be evaluated, or Enter to quit: he is a very genuine person
Positive Sentiment.Confidence Level: 30.14744520187378 %
Input a sentence to be evaluated, or Enter to quit: she is a disgrace to the famil
Positive Sentiment.Confidence Level: 19.513344764709473 %
Input a sentence to be evaluated, or Enter to quit: he is a disgrace
Positive Sentiment.Confidence Level: 29.87825870513916 %
Input a sentence to be evaluated, or Enter to quit: she is a disgrace to the family
Positive Sentiment.Confidence Level: 27.11465358734131 %
Input a sentence to be evaluated, or Enter to quit: A miracle happened that night
Negative Sentiment.Confidence Level: 39.957815408706665 %
Input a sentence to be evaluated, or Enter to quit: fuck off
Negati