# Example on how to load and use the model 

In [1]:
import warnings
warnings.filterwarnings('ignore')

from process_text import clean_up_text
from process_text import string_cohesion
from textblob import TextBlob
import spacy
import pickle
from sklearn.externals import joblib
from sklearn.feature_extraction.text import TfidfVectorizer

[nltk_data] Downloading package wordnet to /home/javier/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /home/javier/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /home/javier/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
#--------------------------------------------------------------
# Import the custom stop words
custom_stop_words = []
with open( "stopwords.txt", "r" ) as fin:
    for line in fin.readlines():
        custom_stop_words.append( line.strip() )
#--------------------------------------------------------------


#--------------------------------------------------------------
# Import the vocabulary and generate the vectorizer tranformer
#--------------------------------------------------------------
(A,terms,dict_sample) = joblib.load( "/home/javier/Documents/sentiment_analysis/sentiment_model/articles-raw.pkl" )
print( "Loaded %d X %d document-term matrix" % (A.shape[0], A.shape[1]) )
print('number of terms: ',len(terms))#print(dic_sample[0:10])
print('Dictionary: ',len(dict_sample))

vectorizer = TfidfVectorizer(stop_words = custom_stop_words,min_df = 20)
A = vectorizer.fit_transform(dict_sample)
print('A: ', A.shape)
#--------------------------------------------------------------


#--------------------------------------------------------------
# We load the trained models
#--------------------------------------------------------------
# The names of the files containing the weights of the model
filename1 = 'lr_sentiment_model.sav'
filename2 = 'nb_sentiment_model.sav'

# Now we load in the trained models
loaded_lr = pickle.load(open(filename1, 'rb'))
loaded_nb = pickle.load(open(filename2, 'rb'))

sample_text = 'Horrible trash'
s = str(clean_up_text(sample_text))

# Transform the text
X0 = vectorizer.transform([s])

# Predict the class 
preds_nb = loaded_nb.predict(X0)
preds_lr = loaded_lr.predict(X0)
preds_blob =  TextBlob(sample_text).sentiment.polarity
if(preds_blob>=0.0):
    preds_blob = +1
else:
    preds_blob = -1



# Compute the probability of the class
indx_nb= int((preds_nb[0]+1)/2)
indx_lr= int((preds_lr[0]+1)/2)
prob_nb = (loaded_nb.predict_proba(X0)[0][indx_nb]-.50)*2.0*int(preds_nb[0])
prob_lr = (loaded_lr.predict_proba(X0)[0][indx_lr]-0.50)*2.0*int(preds_lr[0])
prob_blob = TextBlob(sample_text).sentiment.polarity # [-1,1]

print(indx_nb)
print(sample_text,[preds_nb[0],preds_lr[0],preds_blob],[prob_nb,prob_lr,prob_blob ] )

Loaded 2442 X 1963 document-term matrix
number of terms:  1963
Dictionary:  6105
A:  (6105, 1963)
0
Horrible trash [-1, -1, -1] [-0.8748386165266397, -0.555848299620592, -1.0]


# Example strings for computing topic cohesion

In [4]:
# Example use of the string cohesion metric
s1= 'cat dogs birds rabbit'
s2= 'chair chair chair chair'
s3= 'cars dogs birds rabbit'
s4= ''
print(string_cohesion(s1))
print(string_cohesion(s2))
print(string_cohesion(s3))
print(string_cohesion(s4))

0.653513353317976
1.0
0.5155545175075531
0.0
