In [25]:
import nltk
import random
# from nltk.corpus import movie_reviews
from nltk.classify.scikitlearn import SklearnClassifier
import pickle
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
from nltk.classify import ClassifierI
from statistics import mode
from nltk.tokenize import word_tokenize


class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers = classifiers

    def classify(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)

    def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)

        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf


short_pos = open("short_reviews/positive.txt", "r").read()
short_neg = open("short_reviews/negative.txt", "r").read()

# move this up here
all_words = []
documents = []

#  j is adject, r is adverb, and v is verb
# allowed_word_types = ["J","R","V"]
allowed_word_types = ["J"]

for p in short_pos.split('\n'):
    documents.append((p, "pos"))
    words = word_tokenize(p)
    pos = nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())

for p in short_neg.split('\n'):
    documents.append((p, "neg"))
    words = word_tokenize(p)
    pos = nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())
            
all_words = nltk.FreqDist(all_words)

word_features = list(all_words.keys())[:5000]

def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)

    return features


featuresets = [(find_features(rev), category) for (rev, category) in documents]

random.shuffle(featuresets)
print(len(featuresets))

testing_set = featuresets[10000:]
training_set = featuresets[:10000]

MemoryError: 

In [2]:
BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(training_set)
print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testing_set)) * 100)

save_classifier = open("pickled_algos/BernoulliNB_classifier5k.pickle", "wb")
pickle.dump(BernoulliNB_classifier, save_classifier)
save_classifier.close()

BernoulliNB_classifier accuracy percent: 73.79518072289156


In [3]:
LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
LogisticRegression_classifier.train(training_set)
print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set)) * 100)

save_classifier = open("pickled_algos/LogisticRegression_classifier5k.pickle", "wb")
pickle.dump(LogisticRegression_classifier, save_classifier)
save_classifier.close()

LogisticRegression_classifier accuracy percent: 73.3433734939759


In [4]:
LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(training_set)
print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set)) * 100)

save_classifier = open("pickled_algos/LinearSVC_classifier5k.pickle", "wb")
pickle.dump(LinearSVC_classifier, save_classifier)
save_classifier.close()

LinearSVC_classifier accuracy percent: 71.53614457831326


In [5]:
SGDC_classifier = SklearnClassifier(SGDClassifier())
SGDC_classifier.train(training_set)
print("SGDClassifier accuracy percent:", nltk.classify.accuracy(SGDC_classifier, testing_set) * 100)

save_classifier = open("pickled_algos/SGDC_classifier5k.pickle", "wb")
pickle.dump(SGDC_classifier, save_classifier)
save_classifier.close()



SGDClassifier accuracy percent: 72.43975903614458


In [7]:
classifier = nltk.NaiveBayesClassifier.train(training_set)
print("Original Naive Bayes Algo accuracy percent:", (nltk.classify.accuracy(classifier, testing_set))*100)
classifier.show_most_informative_features(15)

###############
save_classifier = open("pickled_algos/originalnaivebayes5k.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()

Original Naive Bayes Algo accuracy percent: 74.09638554216868
Most Informative Features
              engrossing = True              pos : neg    =     21.1 : 1.0
                mediocre = True              neg : pos    =     17.0 : 1.0
                 generic = True              neg : pos    =     16.3 : 1.0
               inventive = True              pos : neg    =     14.4 : 1.0
              refreshing = True              pos : neg    =     13.7 : 1.0
                 routine = True              neg : pos    =     13.6 : 1.0
                  boring = True              neg : pos    =     13.5 : 1.0
                    flat = True              neg : pos    =     13.0 : 1.0
               wonderful = True              pos : neg    =     12.2 : 1.0
                   fails = True              neg : pos    =     11.8 : 1.0
                    warm = True              pos : neg    =     11.8 : 1.0
       thought-provoking = True              pos : neg    =     11.7 : 1.0
            

In [11]:
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)
print("MNB_classifier accuracy percent:", (nltk.classify.accuracy(MNB_classifier, testing_set)) * 100)

save_classifier = open("pickled_algos/MNB_classifier5k.pickle", "wb")
pickle.dump(MNB_classifier, save_classifier)
save_classifier.close()

MNB_classifier accuracy percent: 73.79518072289156


In [12]:
voted_classifier = VoteClassifier(
    classifier,
    LinearSVC_classifier,
    MNB_classifier,
    BernoulliNB_classifier,
    LogisticRegression_classifier)

print("voted_classifier accuracy percent:", (nltk.classify.accuracy(voted_classifier, testing_set)) * 100)


def sentiment(text):
    feats = find_features(text)

    return voted_classifier.classify(feats)


voted_classifier accuracy percent: 73.94578313253012


In [13]:
import sentiment_mod as s

print(s.sentiment("This movie was awesome! The acting was great, plot was wonderful, and there were pythons...so yea!"))
print(s.sentiment("This movie was utter junk. There were absolutely 0 pythons. I don't see what the point was at all. Horrible movie, 0/10"))


10664
('pos', 1.0)
('neg', 1.0)


In [14]:
import sentiment_mod as s

print(s.sentiment("This movie was awesome! The acting was great, plot was wonderful, and there were pythons...so yea!"))
print(s.sentiment("This movie was utter junk. There were absolutely 0 pythons. I don't see what the point was at all. Horrible movie, 0/10"))


('pos', 1.0)
('neg', 1.0)


In [27]:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import sentiment_mod as s

#consumer key, consumer secret, access token, access secret.
ckey="w8wyoAKvlN9MEDR18HFzKyzUA"
csecret="W4R5i6VAkTgs6Tr6Ij3Jfk8x6k1rkqdPzb5R2JkFq2ccTQqdVr"
atoken="4157664553-PmCikHvFMBXmUf71TruTVi4hg8L4s2kdYaZgg5i"
asecret="Y4rxXI6s8TDmJD5YQZLRqHLPb7d9iTBFi40eoO7weE7Sw"

class listener(StreamListener):

    def on_data(self, data):
        try:
            all_data = json.loads(data)

            tweet = all_data["text"]
            sentiment_value, confidence = s.sentiment(tweet)
            print(tweet, sentiment_value, confidence)

            if confidence*100 >= 80:
                output = open("twitter-out.txt","a")
                output.write(sentiment_value)
                output.write('\n')
                output.close()

            return True
        except:
            return True

    def on_error(self, status):
        print(status)

auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)

twitterStream = Stream(auth, listener())
twitterStream.filter(track=["happy"])

RT @Anirudhchalla_3: 100 days ki ila untey, birthday ki inka mee imagination ke odhilesthunam 💯✊
Jai NTR JAI JAI NTR ✊
Advanced Happy Birth… pos 1.0
@AuthorAvaMRose Thank you so very much. Words can't adequately express how I feel about this. Really got me stoked… https://t.co/VjkcRlwUsN neg 1.0
Happy to share with you my new short drama film focusing on #OCD. https://t.co/A0Zd6GPc75 neg 1.0
自然にラギメイまたは青白を思い浮かべるけど、他のカプなら圧倒的happyにするわよ
幸せになぁれ( *´ω｀)⊃━☆・ﾟ*.+・ﾟ*.+・ﾟ*.+・ﾟ*.+ neg 1.0
RT @EmsInNeverland: Happy Birthday @xChinUpKid 🎉🎉🎉 I hope you have a wonderful day! 🎈🎈 https://t.co/VO6JxlfDSo pos 1.0
RT @ILoveYou: One day you're going to meet someone who makes you feel happy, beautiful, and stands by you even when you feel you don't dese… pos 1.0
@AttardMon @LizAgnes @murpharoo No problem dear, always happy to point out a bit of pretentiousness. neg 0.6
RT @Kirluvjed: @thescript @TheScript_Danny Danny &amp; Glen havin a moment at de end of 2nite's gig 😀 Sums up how happy me &amp; my pal felt 

kookie and yellow makes me so damn happy. reason why i love dna music video so much too hahshhshaha https://t.co/cmFeZbn4zW neg 1.0
Happy Birthday @fwjme!! I'm really glad you took this walk with me! Always remember that I'm always here for you. D… https://t.co/PNHcmfIF3m neg 1.0
あヤか♡happy life♡そこそこ隊 配信中!!
https://t.co/rqi52cccR6 neg 1.0
OTW @ san jose del monte bulacan. Happy fiesta Sitio Karahume :-* :-* :-* pos 1.0
@qldtrials @BrisRacingClub @mgnolan65 Thanks Chris we are super happy hopefully can find a suitable race soon #teambrenda pos 1.0
RT @ANI: Haji Mehboob, in his letter to Sri Sri Ravi Shankar, also mentions 'I am very happy to know that your good self is working to find… pos 1.0
RT @Wildcard_GG: We are happy to announce our newest giveaway, make sure to follow the instructions below. GLHF!

ASUS 144hz #Gaming Monito… pos 1.0
RT @sarkodie: Happy bday Young King @StrongmanBurner My soldier 👊🏾👊🏾👊🏾 God bless you lil Bro !!! Wisdom and more Life ... We celebrate you… neg 1.0
RT

RT @sh6wty: RIP to the boys that lost an amazing girl bc she got fed up of bending backwards going the extra mile for y’all to be happy whi… pos 1.0
RT @justamanda1: Yay just caught a favourite track of mine @Radio_WIGWAM #NowPlaying 🔥🎶Greed by @BlackForest_se happy weekends all 🎶🌘 https… pos 1.0
RT @LarryMac28: Happy Saturday morning from @DISupdates ONLY 8 days until The 2018 Daytona 500 on @NASCARONFOX BUT today on @FS1 cars on tr… neg 1.0
RT @camii_ni: @CiapCarter @nickcarter Girl, you are SO deserving!! I'm very very happy for you!! ❤❤❤ pos 1.0
RT @RosaBrighid: Happy Saturday 😀🙌🏼 https://t.co/VXC08lvKB9 neg 1.0


ProtocolError: ('Connection broken: IncompleteRead(0 bytes read)', IncompleteRead(0 bytes read))