#  COMP4560 - Artefact
## Implementation of Supervised Topic model


Bokun Kong, u6342099

Supervisor: Dr. Dongwoo Kim

This jupyter notebook file is created by Bokun Kong.


* Dataset of Russian troll: https://github.com/fivethirtyeight/russian-troll-tweets/
* Dataset for sentiment analysis: https://www.kaggle.com/kazanova/sentiment140
* sLDA model is created by Dr. Dongwoo Kim: https://github.com/dongwookim-ml/python-topic-model/tree/master/ptm
* Sentiment analysis is based on: https://towardsdatascience.com/basic-binary-sentiment-analysis-using-nltk-c94ba17ae386

In [2]:
# Import libraries
import numpy as np
import pandas as pd
import glob
import string

import nltk
from nltk.tokenize import WordPunctTokenizer
from nltk.stem import WordNetLemmatizer, SnowballStemmer

import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS
from gensim import corpora

# Import supervised topic model algorithm
import logging
from ptm import GibbsSupervisedLDA
from ptm.utils import get_top_words

logger = logging.getLogger('GibbsSupervisedLDA')
logger.propagate = False

In [None]:
# Reading training files and extract useful information
allfiles = glob.glob('training dataset/IRAhandle_tweets_*.csv')
allfiles.sort()

data = pd.concat([pd.read_csv(f) for f in allfiles], ignore_index = True)
print('Number of rows and columns of the whole dataset: {}'.format(data.shape))

df_en = data[(data['language'] == 'English') & (data['account_category'] != 'NonEnglish') & (~data['content'].isnull())]
df_lnr = df_en[((df_en['account_category'] == 'LeftTroll') | (df_en['account_category'] == 'RightTroll'))]
df_lnr = df_lnr.reset_index(drop=True)
# Creat a new training dataset containing useful tweets
df_lnr.to_csv("left_right_news.csv", index=False)


# Reading testing files and extract useful information
allfiles = glob.glob('testing dataset/IRAhandle_tweets_*.csv')
allfiles.sort()

testing_data = pd.concat([pd.read_csv(f) for f in allfiles], ignore_index = True)
print('Number of rows and columns of the whole dataset: {}'.format(testing_data.shape))

df_en = testing_data[(testing_data['language'] == 'English') & (testing_data['account_category'] != 'NonEnglish') & (~testing_data['content'].isnull())]
df_lnr = df_en[((df_en['account_category'] == 'LeftTroll') | (df_en['account_category'] == 'RightTroll'))]
df_lnr = df_lnr.reset_index(drop=True)
# Creat a new testing dataset containing useful tweets
df_lnr.to_csv("testing_left_right_news.csv", index=False)

In [3]:
# Read the generated dataset
df_tweets = pd.read_csv("left_right_news.csv", parse_dates=['publish_date'])
df_tweets = df_tweets.drop(['external_author_id', 'author', 'region', 'language', 'harvested_date', 'following', 'followers', 'updates', 'post_type', 'account_type', 'retweet', 'new_june_2018', 'alt_external_id', 'article_url', 'tco1_step1', 'tco2_step1', 'tco3_step1', 'tweet_id'], axis=1)
df_tweets['index'] = df_tweets.index
df_tweets.shape

# Numerating the troll category 
ratings =  df_tweets.drop(['content', 'publish_date', 'index'], axis=1)
for i in range(len(df_tweets)):
    if ratings.loc[i, 'account_category'] == 'LeftTroll':
        ratings.loc[i, 'account_category'] = -1
    elif ratings.loc[i, 'account_category'] == 'RightTroll':
        ratings.loc[i, 'account_category'] = 1

ratings = ratings.values.flatten()

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
# Read the generated dataset
test_df_tweets = pd.read_csv("testing_left_right_news.csv", parse_dates=['publish_date'])
test_df_tweets = test_df_tweets.drop(['external_author_id', 'author', 'region', 'language', 'harvested_date', 'following', 'followers', 'updates', 'post_type', 'account_type', 'retweet', 'new_june_2018', 'alt_external_id', 'article_url', 'tco1_step1', 'tco2_step1', 'tco3_step1', 'tweet_id'], axis=1)
test_df_tweets['index'] = test_df_tweets.index
test_df_tweets.shape

# Numerating the troll category 
test_ratings =  test_df_tweets.drop(['content', 'publish_date', 'index'], axis=1)
for i in range(len(test_df_tweets)):
    if test_ratings.loc[i, 'account_category'] == 'LeftTroll':
        test_ratings.loc[i, 'account_category'] = -1
    elif test_ratings.loc[i, 'account_category'] == 'RightTroll':
        test_ratings.loc[i, 'account_category'] = 1
    
test_ratings = test_ratings.values.flatten()

In [10]:
# Data pre-processing
punctuation = set(string.punctuation)
stemmer = SnowballStemmer("english")
lemmatizer = WordNetLemmatizer()

def preprocessor(text):
    tokens = WordPunctTokenizer().tokenize(text.lower())
    stems = []
    for token in tokens:
        if token.isalpha() and token not in gensim.parsing.preprocessing.STOPWORDS and token not in punctuation and len(token) >= 3 and len(token) <= 14:
            stems.append(lemmatizer.lemmatize(token, pos='v'))
   
    return stems

docs = df_tweets['content'].map(preprocessor)
test_docs = test_df_tweets['content'].map(preprocessor)

In [11]:
# Creating dictionary for corpus
dictionary = corpora.Dictionary(docs)
dictionary.filter_n_most_frequent(50)
dictionary.filter_extremes(no_below=6, keep_n=100000)
# Generate Document-Term matrix
doc_term_matrix = [dictionary.doc2bow(doc) for doc in docs]

In [12]:
# sLDA parameter setting
n_doc = len(doc_term_matrix)
n_voca = len(dictionary)

In [13]:
# Building corpus for sLDA input
corpus = []
for i in range(n_doc):
    corp = []
    for j in range(len(doc_term_matrix[i])):
        for x in range(doc_term_matrix[i][j][1]):
            
            corp.append(doc_term_matrix[i][j][0])
    corpus.append(corp)   

In [14]:
n_topic = 20
r_var = 0.01
# Training the sLDA model
model = GibbsSupervisedLDA(n_doc, n_voca, n_topic, sigma=r_var)
model.fit(corpus, ratings)

2019-09-06 23:03:06 INFO:GibbsSupervisedLDA:[ITER] 0,	MAE:0.11,	log_likelihood:-70488781.66
2019-09-06 23:08:12 INFO:GibbsSupervisedLDA:[ITER] 1,	MAE:0.11,	log_likelihood:-69348952.23
2019-09-06 23:13:18 INFO:GibbsSupervisedLDA:[ITER] 2,	MAE:0.10,	log_likelihood:-68628204.19
2019-09-06 23:19:24 INFO:GibbsSupervisedLDA:[ITER] 3,	MAE:0.10,	log_likelihood:-68093516.49
2019-09-06 23:25:31 INFO:GibbsSupervisedLDA:[ITER] 4,	MAE:0.10,	log_likelihood:-67726352.70
2019-09-06 23:32:02 INFO:GibbsSupervisedLDA:[ITER] 5,	MAE:0.10,	log_likelihood:-67371376.17
2019-09-06 23:38:33 INFO:GibbsSupervisedLDA:[ITER] 6,	MAE:0.10,	log_likelihood:-67076275.12
2019-09-06 23:45:03 INFO:GibbsSupervisedLDA:[ITER] 7,	MAE:0.10,	log_likelihood:-66837805.20
2019-09-06 23:51:22 INFO:GibbsSupervisedLDA:[ITER] 8,	MAE:0.10,	log_likelihood:-66615324.68
2019-09-06 23:57:42 INFO:GibbsSupervisedLDA:[ITER] 9,	MAE:0.10,	log_likelihood:-66410363.04
2019-09-07 00:04:14 INFO:GibbsSupervisedLDA:[ITER] 10,	MAE:0.10,	log_likelihood:

2019-09-07 06:47:39 INFO:GibbsSupervisedLDA:[ITER] 89,	MAE:0.09,	log_likelihood:-58331368.53
2019-09-07 06:52:42 INFO:GibbsSupervisedLDA:[ITER] 90,	MAE:0.08,	log_likelihood:-58296911.83
2019-09-07 06:57:45 INFO:GibbsSupervisedLDA:[ITER] 91,	MAE:0.08,	log_likelihood:-58259269.04
2019-09-07 07:02:50 INFO:GibbsSupervisedLDA:[ITER] 92,	MAE:0.08,	log_likelihood:-58223729.73
2019-09-07 07:07:51 INFO:GibbsSupervisedLDA:[ITER] 93,	MAE:0.08,	log_likelihood:-58192023.24
2019-09-07 07:12:55 INFO:GibbsSupervisedLDA:[ITER] 94,	MAE:0.08,	log_likelihood:-58164825.26
2019-09-07 07:17:58 INFO:GibbsSupervisedLDA:[ITER] 95,	MAE:0.08,	log_likelihood:-58134696.17
2019-09-07 07:23:02 INFO:GibbsSupervisedLDA:[ITER] 96,	MAE:0.08,	log_likelihood:-58106230.88
2019-09-07 07:28:06 INFO:GibbsSupervisedLDA:[ITER] 97,	MAE:0.08,	log_likelihood:-58080326.12
2019-09-07 07:33:09 INFO:GibbsSupervisedLDA:[ITER] 98,	MAE:0.08,	log_likelihood:-58053697.79
2019-09-07 07:38:12 INFO:GibbsSupervisedLDA:[ITER] 99,	MAE:0.08,	log_l

In [15]:
# Printing the result of sLDA
voca = []
for i in range(n_voca):
    voca.append(dictionary[i])

for ti in model.eta.argsort():
    top_words = get_top_words(model.TW, voca, ti, n_words=15)
    print('Eta', model.eta[ti] ,'Topic', ti ,':\t', ','.join(top_words))

Eta -7.96827685638431 Topic 1 :	 start,woman,best,cop,way,real,school,donald,free,change,racist,things,remember,make,listen
Eta -5.648898978532803 Topic 5 :	 woman,start,cop,hear,change,way,better,men,real,follow,best,free,music,join,read
Eta -3.367248370734816 Topic 6 :	 nowplaying,god,music,best,listen,shit,fuck,start,check,real,happy,staywoke,lol,beat,talk
Eta -3.3213037794588014 Topic 13 :	 cop,russia,donald,officer,report,protest,arrest,election,charge,plan,war,gun,death,law,judge
Eta -2.1533976364066874 Topic 16 :	 racism,fight,history,health,americans,care,enlist,change,way,race,ppl,stand,party,racist,freedom
Eta -1.1624227265095761 Topic 14 :	 music,follow,free,check,twitter,beat,nowplaying,issue,listen,share,artists,track,radio,hiphop,hit
Eta -1.0124758449543325 Topic 3 :	 cop,case,king,justice,campaign,money,pay,ask,hand,murder,run,folks,mean,meet,officer
Eta 1.1938249806105234 Topic 8 :	 fakenews,muslim,gun,isis,islam,muslims,claim,law,islamic,anti,terror,ban,illegal,control

In [16]:
# Generate Helpout Doc for testing the model
test_dictionary = corpora.Dictionary(test_docs)
test_dictionary.filter_n_most_frequent(20)
test_dictionary.filter_extremes(no_below=2, keep_n=30000)

test_doc_term_matrix = [test_dictionary.doc2bow(doc) for doc in test_docs]

In [17]:
# Generating testing corpus
test_corpus = []
for i in range(len(test_doc_term_matrix)):
    corp = []
    for j in range(len(test_doc_term_matrix[i])):
        for x in range(test_doc_term_matrix[i][j][1]):
            
            corp.append(test_doc_term_matrix[i][j][0])
    test_corpus.append(corp)  

In [18]:
# Predicting troll category for testing dataset using generated sLDA model
max_iter = 100
h_doc_topic_sum = model.sample_heldout_doc(max_iter, test_corpus)

In [19]:
# Normalise topic assignments
normalised_topic_assi = h_doc_topic_sum
length = len(h_doc_topic_sum)
for i in range(length):
    sum = 0
    l = len(h_doc_topic_sum[i])
    for j in range(l):
        sum += h_doc_topic_sum[i][j]
    for j in range(l):
        normalised_topic_assi[i][j] = h_doc_topic_sum[i][j] / sum

In [20]:
# Calculating predicted troll category for each tweet
predicted_value = normalised_topic_assi .dot (model.eta)

In [21]:
# Change continuous predicted label to either -1 or +1
for i in range(len(predicted_value)):
    if predicted_value[i] > 0:
        predicted_value[i] = 1
    elif predicted_value[i] < 0:
        predicted_value[i] = -1

In [22]:
# Calculating the mae
mae = np.mean(np.abs(test_ratings - predicted_value))
mae

0.9832435039741627

### Sentiment Analysis
Interesting topic

In [33]:
# Normalise DT
normalised_DT = model.DT
length = len(model.DT)
for i in range(length):
    sum = 0
    l = len(model.DT[i])
    for j in range(l):
        sum += model.DT[i][j]
    for j in range(l):
        normalised_DT[i][j] = model.DT[i][j] / sum

In [43]:
# Choose the interesting topic manually accrording to the result of sLDA
interested_topic = 11
threshold = 0.55
# Genarate corpus for sentiment prediction
corpus = []
for i in range(len(normalised_DT)):
    if normalised_DT[i][interested_topic] >= threshold:
        corpus.append(df_tweets.loc[i, 'content'])

In [44]:
corpus

['"It took Hillary abt 5 minutes to blame NRA for madman\'s rampage, but 5 days to sorta-kinda blame Harvey Weinstein 4 his sexually assaults." https://t.co/uzZ5obFGiO',
 "Liberal's are the most hate filled, racist, hypocritical, bigoted, irrational &amp; mentally unstable creatures on earth. A danger to the world. https://t.co/qpzuVTlhAL",
 'Clay Travis stuns CNN host when he says he believes in two things: the First Amendment and Boobs.   Priceless. He wins the day,😂 https://t.co/ELXA16MCSi',
 '"When President Trump touched those ancient stones, he touched our hearts forever." ~ @netanyahu   Tell me again how Trump is Anti Semitic https://t.co/LwtTRUI75i',
 'RT ChasD3: Strange how no one blamed Bernie Sanders for the shooter of Steve Scalise. But left blames Trump for the killing of the woman by…',
 'I think one girl is all you need, @realDonaldTrump. Drudge: Romney Leads Picks for Secretary of State... Nikki Haley for UN ambassador...',
 '��Berkeley Students SUPPORT Man Waving ISIS 

Building Sentiment Analysis model 

In [3]:
import nltk
import random
from nltk.classify.scikitlearn import SklearnClassifier
import pickle
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC
from nltk.classify import ClassifierI
from statistics import mode
from nltk.tokenize import word_tokenize
import re
import os

corp = pd.read_csv("tn.csv",encoding ="ISO-8859-1",header=None)
corpus_all  =  corp.drop([1, 2, 3, 4], axis=1)
n = len(corpus_all)

corpus_pos = list()
corpus_neg = list()
for i in range(n):
    if corpus_all.loc[i, 0] == 0:
        corpus_neg.append(corpus_all.loc[i, 5])
    elif corpus_all.loc[i, 0] == 4:
        corpus_pos.append(corpus_all.loc[i, 5])

files_pos = corpus_pos[750000:]
files_neg = corpus_neg[750000:]

all_words = []
documents = []

from nltk.corpus import stopwords
import re

stop_words = list(set(stopwords.words('english')))

#  j is adject, r is adverb, and v is verb
#allowed_word_types = ["J","R","V"]
allowed_word_types = ["J"]

for p in  files_pos:
    
    # create a list of tuples where the first element of each tuple is a review
    # the second element is the label
    documents.append( (p, "pos") )
    
    # remove punctuations
    cleaned = re.sub(r'[^(a-zA-Z)\s]','', p)
    
    # tokenize 
    tokenized = word_tokenize(cleaned)
    
    # remove stopwords 
    stopped = [w for w in tokenized if not w in stop_words]
    
    # parts of speech tagging for each word 
    pos = nltk.pos_tag(stopped)
    
    # make a list of  all adjectives identified by the allowed word types list above
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())

    
for p in files_neg:
    # create a list of tuples where the first element of each tuple is a review
    # the second element is the label
    documents.append( (p, "neg") )
    
    # remove punctuations
    cleaned = re.sub(r'[^(a-zA-Z)\s]','', p)
    
    # tokenize 
    tokenized = word_tokenize(cleaned)
    
    # remove stopwords 
    stopped = [w for w in tokenized if not w in stop_words]
    
    # parts of speech tagging for each word 
    neg = nltk.pos_tag(stopped)
    
    # make a list of  all adjectives identified by the allowed word types list above
    for w in neg:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())



In [6]:
# creating a frequency distribution of each adjectives. 
BOW = nltk.FreqDist(all_words)


# listing the 5000 most frequent words
word_features = list(BOW.keys())[:5000]
word_features[0], word_features[-1]


# function to create a dictionary of features for each review in the list document.
# The keys are the words in word_features 
# The values of each key are either true or false for wether that feature appears in the review or not
def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)

    return features

# Creating features for each review
featuresets = [(find_features(rev), category) for (rev, category) in documents]

# Shuffling the documents 
random.shuffle(featuresets)


training_set = featuresets[:80000]
testing_set = featuresets[80000:]
print( 'training_set :', len(training_set), '\ntesting_set :', len(testing_set))


classifier = nltk.NaiveBayesClassifier.train(training_set)

print("Classifier accuracy percent:",(nltk.classify.accuracy(classifier, testing_set))*100)

classifier.show_most_informative_features(15)


# Printing the most important features 
mif = classifier.most_informative_features()

mif = [a for a,b in mif]
print(mif)


# getting predictions for the testing set by looping over each reviews featureset tuple
# The first elemnt of the tuple is the feature set and the second element is the label 
ground_truth = [r[1] for r in testing_set]

preds = [classifier.classify(r[0]) for r in testing_set]


from sklearn.metrics import f1_score
f1_score(ground_truth, preds, labels = ['neg', 'pos'], average = 'micro')

training_set : 80000 
testing_set : 20000
Classifier accuracy percent: 69.425
Most Informative Features
                     vip = True              pos : neg    =     76.1 : 1.0
                     sad = True              neg : pos    =     21.8 : 1.0
               nominated = True              pos : neg    =     19.7 : 1.0
                   humid = True              neg : pos    =     19.0 : 1.0
                     fri = True              neg : pos    =     17.7 : 1.0
               depressed = True              neg : pos    =     14.3 : 1.0
                 stomach = True              neg : pos    =     14.2 : 1.0
           ashleytisdale = True              pos : neg    =     13.4 : 1.0
                   metro = True              neg : pos    =     13.0 : 1.0
                    ouch = True              neg : pos    =     12.7 : 1.0
             musicmonday = True              pos : neg    =     12.6 : 1.0
                terrible = True              neg : pos    =     11.4 : 

0.69425

In [7]:
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB,BernoulliNB
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.metrics import f1_score, accuracy_score
from nltk.classify import ClassifierI


print("Original Naive Bayes Algo accuracy percent:", (nltk.classify.accuracy(classifier, testing_set))*100)
classifier.show_most_informative_features(15)

MNB_clf = SklearnClassifier(MultinomialNB())
MNB_clf.train(training_set)
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_clf, testing_set))*100)

BNB_clf = SklearnClassifier(BernoulliNB())
BNB_clf.train(training_set)
print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BNB_clf, testing_set))*100)

LogReg_clf = SklearnClassifier(LogisticRegression())
LogReg_clf.train(training_set)
print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogReg_clf, testing_set))*100)

SGD_clf = SklearnClassifier(SGDClassifier())
SGD_clf.train(training_set)
print("SGDClassifier_classifier accuracy percent:", (nltk.classify.accuracy(SGD_clf, testing_set))*100)



classifiers_dict = {'ONB': classifier,
                    'MNB': MNB_clf,
                    'BNB': BNB_clf, 
                    'LogReg': LogReg_clf, 
                    'SGD': SGD_clf}


ground_truth = [r[1] for r in testing_set]
predictions = {}
f1_scores = {}
for clf, listy in classifiers_dict.items(): 
    # getting predictions for the testing set by looping over each reviews featureset tuple
    # The first elemnt of the tuple is the feature set and the second element is the label 
    predictions[clf] = [listy.classify(r[0]) for r in testing_set]
    f1_scores[clf] = f1_score(ground_truth, predictions[clf], labels = ['neg', 'pos'], average = 'micro')
    print(f'f1_score {clf}: {f1_scores[clf]}')
    



# Defininig the ensemble model class 

class EnsembleClassifier(ClassifierI):
    
    def __init__(self, *classifiers):
        self._classifiers = classifiers
    
    # returns the classification based on majority of votes
    def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
    # a simple measurement the degree of confidence in the classification 
    def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf

Original Naive Bayes Algo accuracy percent: 69.425
Most Informative Features
                     vip = True              pos : neg    =     76.1 : 1.0
                     sad = True              neg : pos    =     21.8 : 1.0
               nominated = True              pos : neg    =     19.7 : 1.0
                   humid = True              neg : pos    =     19.0 : 1.0
                     fri = True              neg : pos    =     17.7 : 1.0
               depressed = True              neg : pos    =     14.3 : 1.0
                 stomach = True              neg : pos    =     14.2 : 1.0
           ashleytisdale = True              pos : neg    =     13.4 : 1.0
                   metro = True              neg : pos    =     13.0 : 1.0
                    ouch = True              neg : pos    =     12.7 : 1.0
             musicmonday = True              pos : neg    =     12.6 : 1.0
                terrible = True              neg : pos    =     11.4 : 1.0
                   upse



LogisticRegression_classifier accuracy percent: 69.74000000000001




SGDClassifier_classifier accuracy percent: 69.105
f1_score ONB: 0.69425
f1_score MNB: 0.6949
f1_score BNB: 0.69475
f1_score LogReg: 0.6974
f1_score SGD: 0.69105


In [23]:
# Initializing the ensemble classifier 
ensemble_clf = EnsembleClassifier(classifier,MNB_clf,BNB_clf,LogReg_clf,SGD_clf)

# List of only feature dictionary from the featureset list of tuples 
feature_list = [f[0] for f in testing_set]

# Looping over each to classify each review
ensemble_preds = [ensemble_clf.classify(features) for features in feature_list]

f1_score(ground_truth, ensemble_preds, average = 'micro')

# Function to do classification a given review and return the label a
# and the amount of confidence in the classifications
def sentiment(text):
    feats = find_features(text)
    return ensemble_clf.classify(feats), ensemble_clf.confidence(feats)

In [45]:
for i in range(len(corpus)):
    print(corpus[i])
    print(sentiment(corpus[i]))

"It took Hillary abt 5 minutes to blame NRA for madman's rampage, but 5 days to sorta-kinda blame Harvey Weinstein 4 his sexually assaults." https://t.co/uzZ5obFGiO
('neg', 1.0)
Liberal's are the most hate filled, racist, hypocritical, bigoted, irrational &amp; mentally unstable creatures on earth. A danger to the world. https://t.co/qpzuVTlhAL
('pos', 1.0)
Clay Travis stuns CNN host when he says he believes in two things: the First Amendment and Boobs.   Priceless. He wins the day,😂 https://t.co/ELXA16MCSi
('neg', 1.0)
"When President Trump touched those ancient stones, he touched our hearts forever." ~ @netanyahu   Tell me again how Trump is Anti Semitic https://t.co/LwtTRUI75i
('neg', 0.6)
RT ChasD3: Strange how no one blamed Bernie Sanders for the shooter of Steve Scalise. But left blames Trump for the killing of the woman by…
('pos', 1.0)
I think one girl is all you need, @realDonaldTrump. Drudge: Romney Leads Picks for Secretary of State... Nikki Haley for UN ambassador...
('neg'

# Restricted time range

In [6]:
df_tweets_2017 = df_tweets[((df_tweets['publish_date'] >= '2017-01-01') & (df_tweets['publish_date'] <= '2017-12-31'))]
test_df_tweets_2017 = test_df_tweets[((test_df_tweets['publish_date'] >= '2017-01-01') & (test_df_tweets['publish_date'] <= '2017-12-31'))]

df_tweets_2016 = df_tweets[((df_tweets['publish_date'] >= '2016-01-01') & (df_tweets['publish_date'] <= '2016-12-31'))]
test_df_tweets_2016 = test_df_tweets[((test_df_tweets['publish_date'] >= '2016-01-01') & (test_df_tweets['publish_date'] <= '2016-12-31'))]

df_tweets_2015 = df_tweets[((df_tweets['publish_date'] >= '2015-01-01') & (df_tweets['publish_date'] <= '2015-12-31'))]
test_df_tweets_2015 = test_df_tweets[((test_df_tweets['publish_date'] >= '2015-01-01') & (test_df_tweets['publish_date'] <= '2015-12-31'))]

df_tweets_2017 = df_tweets_2017.reset_index(drop=True)
test_df_tweets_2017 = test_df_tweets_2017.reset_index(drop=True)

df_tweets_2016 = df_tweets_2016.reset_index(drop=True)
test_df_tweets_2016 = test_df_tweets_2016.reset_index(drop=True)

df_tweets_2015 = df_tweets_2015.reset_index(drop=True)
test_df_tweets_2015 = test_df_tweets_2015.reset_index(drop=True)

ratings_2017 =  df_tweets_2017.drop(['content', 'publish_date', 'index'], axis=1)
test_ratings_2017 =  test_df_tweets_2017.drop(['content', 'publish_date', 'index'], axis=1)

ratings_2016 =  df_tweets_2016.drop(['content', 'publish_date', 'index'], axis=1)
test_ratings_2016 =  test_df_tweets_2016.drop(['content', 'publish_date', 'index'], axis=1)

ratings_2015 =  df_tweets_2015.drop(['content', 'publish_date', 'index'], axis=1)
test_ratings_2015 =  test_df_tweets_2015.drop(['content', 'publish_date', 'index'], axis=1)

for i in range(len(df_tweets_2017)):
    if ratings_2017.loc[i, 'account_category'] == 'LeftTroll':
        ratings_2017.loc[i, 'account_category'] = -1
    elif ratings_2017.loc[i, 'account_category'] == 'RightTroll':
        ratings_2017.loc[i, 'account_category'] = 1
for i in range(len(test_df_tweets_2017)):
    if test_ratings_2017.loc[i, 'account_category'] == 'LeftTroll':
        test_ratings_2017.loc[i, 'account_category'] = -1
    elif test_ratings_2017.loc[i, 'account_category'] == 'RightTroll':
        test_ratings_2017.loc[i, 'account_category'] = 1
        
for i in range(len(df_tweets_2016)):
    if ratings_2016.loc[i, 'account_category'] == 'LeftTroll':
        ratings_2016.loc[i, 'account_category'] = -1
    elif ratings_2016.loc[i, 'account_category'] == 'RightTroll':
        ratings_2016.loc[i, 'account_category'] = 1
for i in range(len(test_df_tweets_2016)):
    if test_ratings_2016.loc[i, 'account_category'] == 'LeftTroll':
        test_ratings_2016.loc[i, 'account_category'] = -1
    elif test_ratings_2016.loc[i, 'account_category'] == 'RightTroll':
        test_ratings_2016.loc[i, 'account_category'] = 1
        
for i in range(len(df_tweets_2015)):
    if ratings_2015.loc[i, 'account_category'] == 'LeftTroll':
        ratings_2015.loc[i, 'account_category'] = -1
    elif ratings_2015.loc[i, 'account_category'] == 'RightTroll':
        ratings_2015.loc[i, 'account_category'] = 1
for i in range(len(test_df_tweets_2015)):
    if test_ratings_2015.loc[i, 'account_category'] == 'LeftTroll':
        test_ratings_2015.loc[i, 'account_category'] = -1
    elif test_ratings_2015.loc[i, 'account_category'] == 'RightTroll':
        test_ratings_2015.loc[i, 'account_category'] = 1
        

ratings_2017 = ratings_2017.values.flatten()
test_ratings_2017 = test_ratings_2017.values.flatten()

ratings_2016 = ratings_2016.values.flatten()
test_ratings_2016 = test_ratings_2016.values.flatten()

ratings_2015 = ratings_2015.values.flatten()
test_ratings_2015 = test_ratings_2015.values.flatten()

In [7]:
import string
import nltk
from nltk.tokenize import WordPunctTokenizer

from nltk.stem import WordNetLemmatizer, SnowballStemmer
import gensim
from gensim.utils import simple_preprocess
from gensim.parsing.preprocessing import STOPWORDS

import gensim
from gensim import corpora


punctuation = set(string.punctuation)

stemmer = SnowballStemmer("english")
lemmatizer = WordNetLemmatizer()

def preprocessor(text):
    tokens = WordPunctTokenizer().tokenize(text.lower())
    stems = []
    for token in tokens:
        if token.isalpha() and token not in gensim.parsing.preprocessing.STOPWORDS and token not in punctuation and len(token) >= 3 and len(token) <= 14:
            stems.append(lemmatizer.lemmatize(token, pos='v'))
   
    return stems

docs_2017 = df_tweets_2017['content'].map(preprocessor)
test_docs_2017 = test_df_tweets_2017['content'].map(preprocessor)

docs_2016 = df_tweets_2016['content'].map(preprocessor)
test_docs_2016 = test_df_tweets_2016['content'].map(preprocessor)

docs_2015 = df_tweets_2015['content'].map(preprocessor)
test_docs_2015 = test_df_tweets_2015['content'].map(preprocessor)

In [8]:
# Creating dictionary for corpus
dictionary_2017 = corpora.Dictionary(docs_2017)
test_dictionary_2017 = corpora.Dictionary(test_docs_2017)

dictionary_2016 = corpora.Dictionary(docs_2016)
test_dictionary_2016 = corpora.Dictionary(test_docs_2016)

dictionary_2015 = corpora.Dictionary(docs_2015)
test_dictionary_2015 = corpora.Dictionary(test_docs_2015)


dictionary_2017.filter_n_most_frequent(20)
dictionary_2017.filter_extremes(no_below=3, keep_n=30000)
test_dictionary_2017.filter_n_most_frequent(5)
test_dictionary_2017.filter_extremes(no_below=2, keep_n=30000)

dictionary_2016.filter_n_most_frequent(20)
dictionary_2016.filter_extremes(no_below=3, keep_n=30000)
test_dictionary_2016.filter_n_most_frequent(5)
test_dictionary_2016.filter_extremes(no_below=2, keep_n=30000)

dictionary_2015.filter_n_most_frequent(20)
dictionary_2015.filter_extremes(no_below=3, keep_n=30000)
test_dictionary_2015.filter_n_most_frequent(5)
test_dictionary_2015.filter_extremes(no_below=2, keep_n=30000)

# Generate Document-Term matrix
doc_term_matrix_2017 = [dictionary_2017.doc2bow(doc) for doc in docs_2017]
test_doc_term_matrix_2017 = [test_dictionary_2017.doc2bow(doc) for doc in test_docs_2017]

doc_term_matrix_2016 = [dictionary_2016.doc2bow(doc) for doc in docs_2016]
test_doc_term_matrix_2016 = [test_dictionary_2016.doc2bow(doc) for doc in test_docs_2016]

doc_term_matrix_2015 = [dictionary_2015.doc2bow(doc) for doc in docs_2015]
test_doc_term_matrix_2015 = [test_dictionary_2015.doc2bow(doc) for doc in test_docs_2015]

In [9]:
n_doc_2017 = len(doc_term_matrix_2017)
n_voca_2017 = len(dictionary_2017)
test_n_doc_2017 = len(test_doc_term_matrix_2017)

n_doc_2016 = len(doc_term_matrix_2016)
n_voca_2016 = len(dictionary_2016)
test_n_doc_2016 = len(test_doc_term_matrix_2016)

n_doc_2015 = len(doc_term_matrix_2015)
n_voca_2015 = len(dictionary_2015)
test_n_doc_2015 = len(test_doc_term_matrix_2015)

n_topic = 20
r_var = 0.01


corpus_2017 = []
test_corpus_2017 = []
test_corpus_2016 = []
test_corpus_2015 = []
corpus_2016 = []
corpus_2015 = []
for i in range(n_doc_2017):
    corp = []
    for j in range(len(doc_term_matrix_2017[i])):
        for x in range(doc_term_matrix_2017[i][j][1]):
            
            corp.append(doc_term_matrix_2017[i][j][0])
    corpus_2017.append(corp)   
for i in range(test_n_doc_2017):
    corp = []
    for j in range(len(test_doc_term_matrix_2017[i])):
        for x in range(test_doc_term_matrix_2017[i][j][1]):
            
            corp.append(test_doc_term_matrix_2017[i][j][0])
    test_corpus_2017.append(corp) 
    
for i in range(n_doc_2016):
    corp = []
    for j in range(len(doc_term_matrix_2016[i])):
        for x in range(doc_term_matrix_2016[i][j][1]):
            
            corp.append(doc_term_matrix_2016[i][j][0])
    corpus_2016.append(corp)   
for i in range(test_n_doc_2016):
    corp = []
    for j in range(len(test_doc_term_matrix_2016[i])):
        for x in range(test_doc_term_matrix_2016[i][j][1]):
            
            corp.append(test_doc_term_matrix_2016[i][j][0])
    test_corpus_2016.append(corp) 
    
for i in range(n_doc_2015):
    corp = []
    for j in range(len(doc_term_matrix_2015[i])):
        for x in range(doc_term_matrix_2015[i][j][1]):
            
            corp.append(doc_term_matrix_2015[i][j][0])
    corpus_2015.append(corp)   
for i in range(test_n_doc_2015):
    corp = []
    for j in range(len(test_doc_term_matrix_2015[i])):
        for x in range(test_doc_term_matrix_2015[i][j][1]):
            
            corp.append(test_doc_term_matrix_2015[i][j][0])
    test_corpus_2015.append(corp) 

In [196]:
model_2017= GibbsSupervisedLDA(n_doc_2017, n_voca_2017, n_topic, sigma=r_var)
model_2017.fit(corpus_2017, ratings_2017)

voca = []
for i in range(n_voca_2017):
    voca.append(dictionary_2017[i])

for ti in model_2017.eta.argsort():
    top_words = get_top_words(model_2017.TW, voca, ti, n_words=15)
    print('Eta', model_2017.eta[ti] ,'Topic', ti ,':\t', ','.join(top_words))

2019-07-20 16:45:46 INFO:GibbsSupervisedLDA:[ITER] 0,	MAE:0.10,	log_likelihood:-34805794.69
2019-07-20 16:50:03 INFO:GibbsSupervisedLDA:[ITER] 1,	MAE:0.10,	log_likelihood:-34211993.92
2019-07-20 16:54:19 INFO:GibbsSupervisedLDA:[ITER] 2,	MAE:0.10,	log_likelihood:-33869111.33
2019-07-20 16:58:34 INFO:GibbsSupervisedLDA:[ITER] 3,	MAE:0.10,	log_likelihood:-33612435.12
2019-07-20 17:02:53 INFO:GibbsSupervisedLDA:[ITER] 4,	MAE:0.10,	log_likelihood:-33405070.04
2019-07-20 17:07:09 INFO:GibbsSupervisedLDA:[ITER] 5,	MAE:0.10,	log_likelihood:-33229155.92
2019-07-20 17:11:24 INFO:GibbsSupervisedLDA:[ITER] 6,	MAE:0.10,	log_likelihood:-33070835.78
2019-07-20 17:15:40 INFO:GibbsSupervisedLDA:[ITER] 7,	MAE:0.10,	log_likelihood:-32926449.58
2019-07-20 17:19:55 INFO:GibbsSupervisedLDA:[ITER] 8,	MAE:0.10,	log_likelihood:-32795333.79
2019-07-20 17:24:11 INFO:GibbsSupervisedLDA:[ITER] 9,	MAE:0.10,	log_likelihood:-32668437.13
2019-07-20 17:28:29 INFO:GibbsSupervisedLDA:[ITER] 10,	MAE:0.10,	log_likelihood:

2019-07-20 23:04:44 INFO:GibbsSupervisedLDA:[ITER] 89,	MAE:0.09,	log_likelihood:-27791465.96
2019-07-20 23:08:57 INFO:GibbsSupervisedLDA:[ITER] 90,	MAE:0.09,	log_likelihood:-27773723.46
2019-07-20 23:13:12 INFO:GibbsSupervisedLDA:[ITER] 91,	MAE:0.09,	log_likelihood:-27756539.06
2019-07-20 23:17:26 INFO:GibbsSupervisedLDA:[ITER] 92,	MAE:0.09,	log_likelihood:-27741295.96
2019-07-20 23:21:40 INFO:GibbsSupervisedLDA:[ITER] 93,	MAE:0.09,	log_likelihood:-27723492.40
2019-07-20 23:25:55 INFO:GibbsSupervisedLDA:[ITER] 94,	MAE:0.09,	log_likelihood:-27704072.33
2019-07-20 23:30:10 INFO:GibbsSupervisedLDA:[ITER] 95,	MAE:0.08,	log_likelihood:-27686563.25
2019-07-20 23:34:25 INFO:GibbsSupervisedLDA:[ITER] 96,	MAE:0.08,	log_likelihood:-27666529.69
2019-07-20 23:38:41 INFO:GibbsSupervisedLDA:[ITER] 97,	MAE:0.08,	log_likelihood:-27648598.08
2019-07-20 23:42:55 INFO:GibbsSupervisedLDA:[ITER] 98,	MAE:0.08,	log_likelihood:-27629584.80
2019-07-20 23:47:10 INFO:GibbsSupervisedLDA:[ITER] 99,	MAE:0.08,	log_l

Eta -5.076618513532643 Topic 11 :	 music,nowplaying,start,best,women,year,book,youtube,check,read,god,free,real,years,happy
Eta -3.3001379366096404 Topic 8 :	 talibkweli,god,fuck,shit,way,feel,racist,mean,talk,life,word,ppl,twitter,real,happen
Eta -3.259071011943608 Topic 6 :	 women,care,health,help,kill,pay,school,year,african,shoot,years,woman,world,water,plan
Eta -1.467507062660592 Topic 17 :	 music,nowplaying,beat,listen,rap,check,feat,hiphop,fakenews,free,nyc,night,jaketapper,album,cnnpolitics
Eta -1.4441168371054969 Topic 0 :	 russian,sessions,donald,putin,war,court,flynn,election,meet,resist,campaign,ctl,ban,office,order
Eta -1.4302363241380216 Topic 2 :	 year,old,women,life,years,school,men,woman,girl,kid,god,baby,child,way,fuck
Eta -1.325098704449923 Topic 16 :	 best,start,click,soundcloud,promotion,detail,music,million,spend,hit,real,help,followers,grow,promo
Eta 1.1616278904912671 Topic 14 :	 job,rice,power,unmask,give,lead,political,susan,party,business,years,nsa,create,big

In [10]:
model_2016= GibbsSupervisedLDA(n_doc_2016, n_voca_2016, n_topic, sigma=r_var)
model_2016.fit(corpus_2016, ratings_2016)

voca = []
for i in range(n_voca_2016):
    voca.append(dictionary_2016[i])

for ti in model_2016.eta.argsort():
    top_words = get_top_words(model_2016.TW, voca, ti, n_words=15)
    print('Eta', model_2016.eta[ti] ,'Topic', ti ,':\t', ','.join(top_words))

2019-10-15 11:37:08 INFO:GibbsSupervisedLDA:[ITER] 0,	MAE:0.11,	log_likelihood:-24844408.23
2019-10-15 11:38:57 INFO:GibbsSupervisedLDA:[ITER] 1,	MAE:0.10,	log_likelihood:-24309675.14
2019-10-15 11:40:43 INFO:GibbsSupervisedLDA:[ITER] 2,	MAE:0.10,	log_likelihood:-24005065.00
2019-10-15 11:42:31 INFO:GibbsSupervisedLDA:[ITER] 3,	MAE:0.10,	log_likelihood:-23791378.78
2019-10-15 11:44:21 INFO:GibbsSupervisedLDA:[ITER] 4,	MAE:0.10,	log_likelihood:-23618600.76
2019-10-15 11:46:08 INFO:GibbsSupervisedLDA:[ITER] 5,	MAE:0.10,	log_likelihood:-23474751.61
2019-10-15 11:47:56 INFO:GibbsSupervisedLDA:[ITER] 6,	MAE:0.10,	log_likelihood:-23349148.26
2019-10-15 11:49:43 INFO:GibbsSupervisedLDA:[ITER] 7,	MAE:0.10,	log_likelihood:-23235948.03
2019-10-15 11:51:31 INFO:GibbsSupervisedLDA:[ITER] 8,	MAE:0.10,	log_likelihood:-23127028.31
2019-10-15 11:53:17 INFO:GibbsSupervisedLDA:[ITER] 9,	MAE:0.10,	log_likelihood:-23030741.67
2019-10-15 11:55:07 INFO:GibbsSupervisedLDA:[ITER] 10,	MAE:0.10,	log_likelihood:

2019-10-15 14:14:58 INFO:GibbsSupervisedLDA:[ITER] 89,	MAE:0.09,	log_likelihood:-20183155.63
2019-10-15 14:16:43 INFO:GibbsSupervisedLDA:[ITER] 90,	MAE:0.09,	log_likelihood:-20177848.92
2019-10-15 14:18:32 INFO:GibbsSupervisedLDA:[ITER] 91,	MAE:0.09,	log_likelihood:-20169246.60
2019-10-15 14:20:19 INFO:GibbsSupervisedLDA:[ITER] 92,	MAE:0.09,	log_likelihood:-20161308.86
2019-10-15 14:22:07 INFO:GibbsSupervisedLDA:[ITER] 93,	MAE:0.09,	log_likelihood:-20153486.41
2019-10-15 14:23:52 INFO:GibbsSupervisedLDA:[ITER] 94,	MAE:0.09,	log_likelihood:-20144981.65
2019-10-15 14:25:37 INFO:GibbsSupervisedLDA:[ITER] 95,	MAE:0.09,	log_likelihood:-20138223.60
2019-10-15 14:27:23 INFO:GibbsSupervisedLDA:[ITER] 96,	MAE:0.09,	log_likelihood:-20129252.76
2019-10-15 14:29:08 INFO:GibbsSupervisedLDA:[ITER] 97,	MAE:0.09,	log_likelihood:-20123460.44
2019-10-15 14:30:52 INFO:GibbsSupervisedLDA:[ITER] 98,	MAE:0.09,	log_likelihood:-20117259.40
2019-10-15 14:32:37 INFO:GibbsSupervisedLDA:[ITER] 99,	MAE:0.09,	log_l

Eta -5.039700313332487 Topic 16 :	 man,cop,kill,shoot,get,live,years,video,win,day,love,think,old,year,help
Eta -2.8922032415390984 Topic 4 :	 live,america,support,war,matter,american,racism,change,state,fight,donald,president,racist,justice,history
Eta -2.847702722641812 Topic 0 :	 love,get,staywoke,fuck,shit,good,think,god,lol,feel,look,come,life,happy,ass
Eta -1.3157841619261328 Topic 17 :	 win,hand,go,nowplaying,play,miss,music,chance,watch,fall,live,hit,come,video,get
Eta -1.3075007441881537 Topic 13 :	 music,play,listen,nowplaying,video,amp,check,album,free,track,watch,issue,beat,hiphop,feature
Eta -1.296946560337417 Topic 12 :	 bernie,staywoke,win,sanders,super,debate,party,team,think,donald,women,years,run,lose,pay
Eta -1.2935990591332267 Topic 5 :	 today,live,african,join,life,american,help,celebrate,come,africa,power,world,start,go,day
Eta -1.291421168518632 Topic 3 :	 nodapl,call,water,media,stand,movement,election,donald,state,news,jackson,report,president,flint,lead
Eta -1

In [None]:
model_2015= GibbsSupervisedLDA(n_doc_2015, n_voca_2015, n_topic, sigma=r_var)
model_2015.fit(corpus_2015, ratings_2015)

voca = []
for i in range(n_voca_2015):
    voca.append(dictionary_2015[i])

for ti in model_2015.eta.argsort():
    top_words = get_top_words(model_2015.TW, voca, ti, n_words=15)
    print('Eta', model_2015.eta[ti] ,'Topic', ti ,':\t', ','.join(top_words))

In [222]:
max_iter = 100
h_doc_topic_sum_2017 = model_2017.sample_heldout_doc(max_iter, test_corpus_2017)

# Normalise topic assignments
normalised_topic_assi_2017 = h_doc_topic_sum_2017
length = len(h_doc_topic_sum_2017)
for i in range(length):
    sum = 0
    l = len(h_doc_topic_sum_2017[i])
    for j in range(l):
        sum += h_doc_topic_sum_2017[i][j]
    for j in range(l):
        normalised_topic_assi_2017[i][j] = h_doc_topic_sum_2017[i][j] / sum
        
        
predicted_value_2017 = normalised_topic_assi_2017 .dot (model_2017.eta)

for i in range(len(predicted_value_2017)):
    if predicted_value_2017[i] > 0:
        predicted_value_2017[i] = 1
    elif predicted_value_2017[i] < 0:
        predicted_value_2017[i] = -1

mae_2017 = np.mean(np.abs(test_ratings_2017 - predicted_value_2017))
mae_2017

0.741528676389955

In [13]:
max_iter = 100
h_doc_topic_sum_2016 = model_2016.sample_heldout_doc(max_iter, test_corpus_2016)

# Normalise topic assignments
normalised_topic_assi_2016 = h_doc_topic_sum_2016
length = len(h_doc_topic_sum_2016)
for i in range(length):
    sum = 0
    l = len(h_doc_topic_sum_2016[i])
    for j in range(l):
        sum += h_doc_topic_sum_2016[i][j]
    for j in range(l):
        normalised_topic_assi_2016[i][j] = h_doc_topic_sum_2016[i][j] / sum
        
        
predicted_value_2016 = normalised_topic_assi_2016 .dot (model_2016.eta)

for i in range(len(predicted_value_2016)):
    if predicted_value_2016[i] > 0:
        predicted_value_2016[i] = 1
    elif predicted_value_2016[i] < 0:
        predicted_value_2016[i] = -1

mae_2016 = np.mean(np.abs(test_ratings_2016 - predicted_value_2016))
mae_2016

0.9821167510964539

In [None]:
max_iter = 100
h_doc_topic_sum_2015 = model_2015.sample_heldout_doc(max_iter, test_corpus)

# Normalise topic assignments
normalised_topic_assi_2015 = h_doc_topic_sum_2015
length = len(h_doc_topic_sum_2015)
for i in range(length):
    sum = 0
    l = len(h_doc_topic_sum_2015[i])
    for j in range(l):
        sum += h_doc_topic_sum_2015[i][j]
    for j in range(l):
        normalised_topic_assi_2015[i][j] = h_doc_topic_sum_2015[i][j] / sum
        
        
predicted_value_2015 = normalised_topic_assi_2015 .dot (model_2015.eta)

for i in range(len(predicted_value_2015)):
    if predicted_value_2015[i] > 0:
        predicted_value_2015[i] = 1
    elif predicted_value_2015[i] < 0:
        predicted_value_2015[i] = -1

mae_2015 = np.mean(np.abs(test_ratings - predicted_value_2015))
mae_2015

Sentiment analysis

In [14]:
# For 2016
# Normalise DT
normalised_DT = model_2016.DT
length = len(model_2016.DT)
for i in range(length):
    sum = 0
    l = len(model_2016.DT[i])
    for j in range(l):
        sum += model_2016.DT[i][j]
    for j in range(l):
        normalised_DT[i][j] = model_2016.DT[i][j] / sum

In [15]:
interested_topic = 1
threshold = 0.5
corpus_2016 = []
for i in range(len(normalised_DT)):
    if normalised_DT[i][interested_topic] >= threshold:
        corpus_2016.append(df_tweets_2016.loc[i, 'content'])

In [24]:
for i in range(len(corpus_2016)):
    print(corpus_2016[i])
    print(sentiment(corpus_2016[i]))

Prince Philip isn't happy about being a stay-at-home dad? #TheCrown
('neg', 1.0)
on today's episode i introduce the world to my own version of sasha fiece: Lola LaFway. she's still pretty awkward but its cool
('neg', 1.0)
RT @MezCPA: MT @CalFreedomMom: To save America, we must turn to God through His Son, Jesus Christ. #ccot https://t.co/udRxe4Cnun #RenewUS #…
('pos', 0.8)
#TCOT You Can Almost Feel Their Giddy Glee https://t.co/s81EYODIZg
('neg', 1.0)
You need to shut up and shave that shit off your face! https://t.co/5Wp7ucyfAa
('pos', 1.0)
#CCOT "It is impossible to mentally or socially enslave a Bible-reading people."  ~ Horace Greeley #freedom #liberty #politics
('pos', 0.8)
RT @PatriotGinger2: MT @TimBurt: '...And if someone asks about your Christian hope, always be ready to explain it' https://t.co/IJKmn6bmX6 …
('neg', 1.0)
People 'love nature' until it crawls up their leg
('pos', 1.0)
Fabulous reads from little tykes to grandma #thrillers #chillers & #Romance  https://t.co/j7c4X

('pos', 1.0)
#Trump says he is very sorry. As ✝Christians, we believe in redemption + forgiveness. �� https://t.co/t7c7P8AyX2
('pos', 1.0)
Dear @realDonaldTrump stop mentioning @BernieSanders voters,  We will NEVER NEVER vote for you, you piece of crap.  @ChuckNellis
('neg', 1.0)
'They were the WORST we've EVER tested...' - Frank Luntz #NeverTrumpOrHillary https://t.co/RHjgbz5uZD
('neg', 1.0)
'@RWSurferGirl1 @RogerJStoneJr @DRJAMESCABOT @JaredWyand @kupajo322 WE MUST DONATE DIRECTLY TO TRUMP TODAY ��� RT ALL https://t.co/Wuy1d40lHn'
('neg', 1.0)
It's so cute when Hollywood "celebrities" tell us all who to vote for https://t.co/7P4sVD2RxX
('neg', 1.0)
'@HillaryClinton You just want Latinos washing your laundry and cooking for you!  You never went to visit the #Louisianafloodvictims'
('neg', 1.0)
'@SSC1974 @MichelleObama Didn't mean to offend, sorry. My belief is that God knows what's in my heart and doesn't take offense :)'
('pos', 1.0)
Nobel Prize for Chemistry goes to Keith Richards
(

('neg', 1.0)
'@Goldthinkers @realDonaldTrump check this out so simply put. Loved her'
('neg', 1.0)
CAN U image how many will die-if she got N.BUT she wont-@realDonaldTrump will B the Next US President. God will Bless US Again-as we Him..! https://t.co/9TAf3acltZ
('neg', 1.0)
Capricia Marshall: "This is so weird that we cannot get one person to go on this stage. " Maybe cause HRC is unlika… https://t.co/TbmnLfe4Bu
('pos', 1.0)
God bless his soul and comfort his family. https://t.co/zJW6Co3pFg
('neg', 1.0)
Omg. I just found out I'm going to be a father. I can't stop crying with joy
('neg', 1.0)
If you dress up as a clown for Halloween then you're asking to get beat up by the end of the night haha
('neg', 1.0)
My babies warns my heart leading the prays every now, I'm so blessed to have my babies my heart, I thank God https://t.co/UJbin7Sbwt
('neg', 1.0)
It is time for Christians around the world to stand against their clergymen who call for murder in the name of the pacifist Jesus Christ.

('neg', 1.0)
All I want for Christmas is a healthy @Patriots team in January & February.  Please...#NEvsNYJ
('pos', 1.0)
'@WayneDupreeShow DJT should show no mercy on her sorry ass...'
('pos', 1.0)
"They also believe in God!!���������� https://t.co/9AaqXV7XRV
('pos', 0.6)
'@camboviet @jorgenseptember @downbyseashore @Louise0451 @eavesdropann @bugnurse970 @LorenceHud @F100Stealth @KnucklDraginSam @jimmygarner https://t.co/VFoRiPuLsr'
('neg', 1.0)
Retweeted Kristina Lackey (@klackey33):  CUBS!!! World Series! Ahhhhhhh! #flythew #gocubsgo #worldseries... https://t.co/GuQuDfQzwS
('neg', 1.0)
#Christianity #faith #sin #holiness #HolySpirit #Bible #culture #repentance #believe #TrustGod https://t.co/USAgWaaCbU
('neg', 1.0)
Fucking hell!  Get round THAT breakfast and you'd be alright for a day!  Done my duty, "Pigs-in-blankets Bap" downed for breakfast! https://t.co/CHGvcPIgnT
('neg', 1.0)
Big day on Thursday for Indiana and the great workers of that wonderful state.We will keep our companies

('neg', 1.0)
New post: "Gasol scores 25 as Grizzlies rally to edge Magic 95-94" https://t.co/uSPBKUnLB8
('neg', 1.0)
Goodnight and MERRY CHRISTMAS fellow Americans! Thank you for your RTs, FAVS, Comments. ��❤️����#MAGA @realDonaldTrump https://t.co/Lm7uVrfvOg
('neg', 1.0)
Let's kiss the wind!  Let's get blessed!  In this new week laughing like the sun! https://t.co/N0LykYC8wX
('neg', 1.0)
Merry Christmas to all my followers (even the hidden trolls) I'm very thankful that my Christmas came early #PresidentElectTrump won AGAIN! https://t.co/5TSZZCHlEe
('neg', 1.0)
"Making this movie was a very, very courageous thing to do." #JaneGoodall #TheIvoryGame https://t.co/xg2L78x2HN
('neg', 0.8)
The North Carolina electors sang ‘God Bless America’ on their way to vote for Trump. This is awesome! https://t.co/p12urxBEAN
('neg', 1.0)
ADVENTURES OF MANA #adventuresofmana #videogame #gamers #apk #Gamer #indiedev #games #Hacked https://t.co/W7x5lYfWuW
('neg', 1.0)
One of the sanest, surest, and most g

('neg', 1.0)
'@LoveStephanysco they'd learn it hard way or die in jail...'
('pos', 1.0)
����Merry Christmas���� Hilarious Trump Christmas Parody “It’s The Most Wonderful Time in 8 Years” https://t.co/zSLypJq1Lo via @YouTube
('neg', 1.0)
I hope the person who sang "Hippopotamus for Christmas" got eaten by a hippopotamus.
('pos', 1.0)
'@wsredneck @sluggoD54 @outdoor_blues @morlan111 @fishmajishun1 @buckuglygear @bucksdeer @AwesomeMerica Deer sees that mug,has heartache dies'
('neg', 1.0)
Agree Gilbert! Great hope for America! I know God had his hands on Trump, We can now say Merry Christmas unapologetically! https://t.co/ZcH8VWVdLh
('neg', 1.0)
This is what actually happened last night son was off of mad xans ���� https://t.co/S6slgr82pe
('pos', 1.0)
Great Christmas lights at a house somewhere here in New York!  #MerryChristmas https://t.co/gKy0oQLAaf
('neg', 1.0)
"Kind #words are short and easy to speak but their echoes are truly endless" ❤  #love #kindness #compassion https://t.co/xULS

('pos', 0.8)
Class act, just like his father. Trump's greatest success is his children, he is a winner in life. @realDonaldTrump https://t.co/RmRoWF7P3M
('neg', 1.0)
Absolutely adorable! He can't wait to be baptized! God bless him, always!!! ❤❤❤ https://t.co/kSLgLCLzZ7
('neg', 1.0)
#TrumpsterWarriors �����  You are �% correct! Omg can it get any more boring - even falling over is even old �   https://t.co/1eR4unvnX6
('pos', 1.0)
The man who reads nothing at all is better educated than the man who reads nothing but newspapers. - Thomas Jefferson
('neg', 1.0)
Maybe...@AIIAmericanGirI @sxdoc @Karennola719 @Jnbarke @WarChief82 @Dbargen @realDonaldTrump @MarkGinn20 #Trump https://t.co/L2vMvOfJxr
('neg', 1.0)
Say "Hello" to the Bad Guy! Welcome to the Family @ChaelSonnen! #BellatorNation
('neg', 1.0)
'The Willingness to Tell the People the Truth.' What makes Trump so Popular and Hillary so Unpopular.  @patcondell    https://t.co/KAs7NUELiJ
('neg', 1.0)
Jesus forgiving the woman taken N adult

('neg', 1.0)
Ya, Crowder, it's easy to pretend that you love MLK now...  But we know you would have HATED him back in the day!   @scrowder
('neg', 1.0)
Everybody's guilty but  they are all given immunity https://t.co/Rrx2VSgjMm
('neg', 1.0)
Nigel Farage's beautiful dig at Hillary. I wouldn't vote for Hillary if SHE paid me.   https://t.co/BQBngvDxul Nigel knows she's a payer.
('neg', 1.0)
3 #ebooks #salvation� https://t.co/3TJSvW6bYU https://t.co/kp02KFOVYg https://t.co/rTtg4cw9WW #US #USA #Jesus #EWTN https://t.co/BBIPRCJkxP
('neg', 1.0)
'@JmsWmFox God Has chosen the lowly, the meek, the things that are Not:to bring to nothing the things that Are'
('neg', 1.0)
Amen! God WILL HEAL OUR LAND. Thank you Donald for all you do for the USA! Our beautiful Christian Country! We The People Love You & Pence! https://t.co/5jHYa3bQ7P
('neg', 1.0)
Good morning and happy International Coffee Day! This cup of our Ugandan coffee is helping to… https://t.co/NiGQ69WpvE
('neg', 1.0)
Check this out: Bruce

('neg', 1.0)
Actually, there is a lot of unity. Everyone thinks Cruz is pretty slimy.  https://t.co/pIaMqqOlFx


KeyboardInterrupt: 