## Importing the required libraries.

In [5]:
import nltk
import random
from nltk.corpus import state_union
from nltk.tokenize import PunktSentenceTokenizer
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.corpus import gutenberg
from nltk.corpus import wordnet
from nltk.corpus import movie_reviews
import pickle
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB,BernoulliNB

from nltk.classify import ClassifierI
from statistics import mode
from sklearn.linear_model import LogisticRegression,SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC

## Sentence Tokenizing

In [6]:
EXAMPLE_TEXT = "Hello Mr. Smith, how are you doing today? The weather is great, and Python is awesome. The sky is pinkish-blue. You shouldn't eat cardboard."
print(sent_tokenize(EXAMPLE_TEXT))

['Hello Mr. Smith, how are you doing today?', 'The weather is great, and Python is awesome.', 'The sky is pinkish-blue.', "You shouldn't eat cardboard."]


## Word Tokenizing 

In [7]:
print(word_tokenize(EXAMPLE_TEXT))

['Hello', 'Mr.', 'Smith', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', ',', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'pinkish-blue', '.', 'You', 'should', "n't", 'eat', 'cardboard', '.']


## Stop words 

In [8]:
example_sent = "This is a sample sentence, showing off the stop words filtration."

stop_words = set(stopwords.words('english'))

word_tokens = word_tokenize(example_sent)

filtered_sentence = [w for w in word_tokens if not w in stop_words]

filtered_sentence = []

for w in word_tokens:
    if w not in stop_words:
        filtered_sentence.append(w)

print(word_tokens)
print(filtered_sentence)

['This', 'is', 'a', 'sample', 'sentence', ',', 'showing', 'off', 'the', 'stop', 'words', 'filtration', '.']
['This', 'sample', 'sentence', ',', 'showing', 'stop', 'words', 'filtration', '.']


## Stemming 

In [9]:
ps = PorterStemmer()
example_words = ["python","pythoner","pythoning","pythoned","pythonly"]
for w in example_words:
    print(ps.stem(w))

python
python
python
python
pythonli


In [10]:
new_text = "It is important to by very pythonly while you are pythoning with python. All pythoners have pythoned poorly at least once."
words = word_tokenize(new_text)

for w in words:
    print(ps.stem(w))

It
is
import
to
by
veri
pythonli
while
you
are
python
with
python
.
all
python
have
python
poorli
at
least
onc
.


## Getting dataset from imported module.

In [11]:
train_text = state_union.raw("2005-GWBush.txt")
sample_text = state_union.raw("2006-GWBush.txt")
#sample_text = "the little yellow dog barked at the cat"

## Training the PunktSentenceTokenizer

In [12]:
custom_sent_tokenizer = PunktSentenceTokenizer(train_text)

## Tokenize the sample text using trained PunktSentenceTokenizer

In [13]:
tokenized = custom_sent_tokenizer.tokenize(sample_text)

## Function to process tagged words

In [14]:
def tagged_content():
    try:
        for i in tokenized[:5]:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            print(tagged)

    except Exception as e:
        print(str(e))

In [15]:
tagged_content()

[('PRESIDENT', 'NNP'), ('GEORGE', 'NNP'), ('W.', 'NNP'), ('BUSH', 'NNP'), ("'S", 'POS'), ('ADDRESS', 'NNP'), ('BEFORE', 'IN'), ('A', 'NNP'), ('JOINT', 'NNP'), ('SESSION', 'NNP'), ('OF', 'IN'), ('THE', 'NNP'), ('CONGRESS', 'NNP'), ('ON', 'NNP'), ('THE', 'NNP'), ('STATE', 'NNP'), ('OF', 'IN'), ('THE', 'NNP'), ('UNION', 'NNP'), ('January', 'NNP'), ('31', 'CD'), (',', ','), ('2006', 'CD'), ('THE', 'NNP'), ('PRESIDENT', 'NNP'), (':', ':'), ('Thank', 'NNP'), ('you', 'PRP'), ('all', 'DT'), ('.', '.')]
[('Mr.', 'NNP'), ('Speaker', 'NNP'), (',', ','), ('Vice', 'NNP'), ('President', 'NNP'), ('Cheney', 'NNP'), (',', ','), ('members', 'NNS'), ('of', 'IN'), ('Congress', 'NNP'), (',', ','), ('members', 'NNS'), ('of', 'IN'), ('the', 'DT'), ('Supreme', 'NNP'), ('Court', 'NNP'), ('and', 'CC'), ('diplomatic', 'JJ'), ('corps', 'NN'), (',', ','), ('distinguished', 'JJ'), ('guests', 'NNS'), (',', ','), ('and', 'CC'), ('fellow', 'JJ'), ('citizens', 'NNS'), (':', ':'), ('Today', 'VB'), ('our', 'PRP$'), ('nat

## Chuking function

In [16]:
def chuck_content():
    try:
        for i in tokenized[:2]:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            chunkGram = r"""Chunk: {<RB.?>*<VB.?>*<NNP>+<NN>?}"""
            chunkParser = nltk.RegexpParser(chunkGram)
            chunked = chunkParser.parse(tagged)
            #chunked.draw()
            for subtree in chunked.subtrees():
                print(subtree)

    except Exception as e:
        print(str(e))

In [17]:
chuck_content()

(S
  (Chunk PRESIDENT/NNP GEORGE/NNP W./NNP BUSH/NNP)
  'S/POS
  (Chunk ADDRESS/NNP)
  BEFORE/IN
  (Chunk A/NNP JOINT/NNP SESSION/NNP)
  OF/IN
  (Chunk THE/NNP CONGRESS/NNP ON/NNP THE/NNP STATE/NNP)
  OF/IN
  (Chunk THE/NNP UNION/NNP January/NNP)
  31/CD
  ,/,
  2006/CD
  (Chunk THE/NNP PRESIDENT/NNP)
  :/:
  (Chunk Thank/NNP)
  you/PRP
  all/DT
  ./.)
(Chunk PRESIDENT/NNP GEORGE/NNP W./NNP BUSH/NNP)
(Chunk ADDRESS/NNP)
(Chunk A/NNP JOINT/NNP SESSION/NNP)
(Chunk THE/NNP CONGRESS/NNP ON/NNP THE/NNP STATE/NNP)
(Chunk THE/NNP UNION/NNP January/NNP)
(Chunk THE/NNP PRESIDENT/NNP)
(Chunk Thank/NNP)
(S
  (Chunk Mr./NNP Speaker/NNP)
  ,/,
  (Chunk Vice/NNP President/NNP Cheney/NNP)
  ,/,
  members/NNS
  of/IN
  (Chunk Congress/NNP)
  ,/,
  members/NNS
  of/IN
  the/DT
  (Chunk Supreme/NNP Court/NNP)
  and/CC
  diplomatic/JJ
  corps/NN
  ,/,
  distinguished/JJ
  guests/NNS
  ,/,
  and/CC
  fellow/JJ
  citizens/NNS
  :/:
  Today/VB
  our/PRP$
  nation/NN
  lost/VBD
  a/DT
  beloved/VBN
  ,/,
  g

## Chicking function 

In [18]:
def chicking_content():
    try:
        for i in tokenized[:2]:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)

            chunkGram = r"""Chunk: {<.*>+}
                                    }<VB.?|IN|DT|TO>+{"""

            chunkParser = nltk.RegexpParser(chunkGram)
            chunked = chunkParser.parse(tagged)

            chunked.draw()

    except Exception as e:
        print(str(e))

In [19]:
chicking_content()

## Function to process the Name entitity 

In [20]:
def process_content():
    try:
        for i in tokenized[:3]:
            words = nltk.word_tokenize(i)
            tagged = nltk.pos_tag(words)
            #print(tagged)
            nameEnt = nltk.ne_chunk(tagged)
            nameEnt.draw()
    except Exception as e:
        print(str(e))

In [21]:
process_content()

## Lemmatizing

In [22]:
lemmatizer = WordNetLemmatizer()

print(lemmatizer.lemmatize("cats"))
print(lemmatizer.lemmatize("cacti"))
print(lemmatizer.lemmatize("geese"))
print(lemmatizer.lemmatize("rocks"))
print(lemmatizer.lemmatize("python"))
print(lemmatizer.lemmatize("better", pos="a"))
print(lemmatizer.lemmatize("best", pos="a"))
print(lemmatizer.lemmatize("run"))
print(lemmatizer.lemmatize("run",'v'))

cat
cactus
goose
rock
python
good
best
run
run


## Location of nltk files 

In [23]:
print(nltk.__file__)

/home/ns/anaconda3/envs/tf/lib/python3.6/site-packages/nltk/__init__.py


## Accessing a file from corpus 

In [24]:
sample = gutenberg.raw("bible-kjv.txt")

tok = sent_tokenize(sample)

for x in range(5):
    print(tok[x])

[The King James Bible]

The Old Testament of the King James Bible

The First Book of Moses:  Called Genesis


1:1 In the beginning God created the heaven and the earth.
1:2 And the earth was without form, and void; and darkness was upon
the face of the deep.
And the Spirit of God moved upon the face of the
waters.
1:3 And God said, Let there be light: and there was light.
1:4 And God saw the light, that it was good: and God divided the light
from the darkness.


## Wordnet 

In [25]:
syns = wordnet.synsets("program")

In [26]:
print(syns[0].name())

plan.n.01


In [27]:
print(syns[0].lemmas()[0].name())

plan


In [28]:
print(syns[0].examples())

['they drew up a six-step plan', 'they discussed plans for a new bond issue']


In [29]:
synonyms = []
antonyms = []

for syn in wordnet.synsets("good"):
    for l in syn.lemmas():
        synonyms.append(l.name())
        if l.antonyms():
            antonyms.append(l.antonyms()[0].name())

## Synonyms and Antonyms

In [30]:
print(set(synonyms))
print(set(antonyms))

{'respectable', 'practiced', 'skilful', 'unspoilt', 'effective', 'commodity', 'right', 'undecomposed', 'salutary', 'thoroughly', 'soundly', 'good', 'beneficial', 'just', 'proficient', 'dependable', 'unspoiled', 'trade_good', 'honest', 'sound', 'full', 'ripe', 'goodness', 'upright', 'in_force', 'near', 'honorable', 'skillful', 'serious', 'expert', 'secure', 'estimable', 'safe', 'in_effect', 'well', 'adept', 'dear'}
{'ill', 'evil', 'evilness', 'badness', 'bad'}


## Similarity of two words

In [31]:
w1 = wordnet.synset('ship.n.01')
w2 = wordnet.synset('boat.n.01')
print(w1.wup_similarity(w2))

0.9090909090909091


## Text classification 

In [32]:
documents = [(list(movie_reviews.words(fileid)), category)
             for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

In [33]:
print(documents[1])

(['clint', 'eastwood', ',', 'in', 'his', 'ripe', 'old', 'age', ',', 'is', 'cashing', 'one', 'talent', 'in', 'for', 'another', '.', 'midnight', 'in', 'the', 'garden', 'of', 'good', 'and', 'evil', 'is', 'an', 'eastwood', '-', 'directed', 'film', 'clint', 'isn', "'", 't', 'even', 'in', ',', 'and', 'it', "'", 's', 'damn', 'good', '.', 'adapted', 'from', 'a', 'best', '-', 'selling', 'john', 'berendt', 'novel', 'based', 'on', 'true', 'events', ',', 'this', 'movie', 'is', 'set', 'in', 'the', 'bizarre', 'georgia', 'town', 'of', 'savannah', ',', 'where', 'people', 'walk', 'invisible', 'dogs', 'and', 'attach', 'horseflies', 'to', 'their', 'head', '.', 'and', 'that', "'", 's', 'just', 'the', 'mayor', '.', 'as', 'director', 'and', 'producer', ',', 'eastwood', 'contributes', 'a', 'self', '-', 'indulgent', 'but', 'very', 'competent', '150', 'minutes', ',', 'neatly', 'balancing', 'drama', ',', 'suspense', 'and', 'humor', '.', 'like', 'all', 'great', 'movies', ',', 'midnight', 'in', 'the', 'garden', '

In [34]:
all_words = []
for w in movie_reviews.words():
    all_words.append(w.lower())


## Frequency analysis 

In [35]:
all_words = nltk.FreqDist(all_words)
print(all_words.most_common(15))


[(',', 77717), ('the', 76529), ('.', 65876), ('a', 38106), ('and', 35576), ('of', 34123), ('to', 31937), ("'", 30585), ('is', 25195), ('in', 21822), ('s', 18513), ('"', 17612), ('it', 16107), ('that', 15924), ('-', 15595)]


## Occurence of a word 

In [36]:
print(all_words["boy"])

362


## Word features

In [37]:
word_features = list(all_words.keys())[:3000]
def find_features(document):
    words = set(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)
    return features

In [38]:
print((find_features(movie_reviews.words('neg/cv000_29416.txt'))))



In [39]:
featuresets = [(find_features(rev), category) for (rev, category) in documents]

# Train using Naive Bayes  

In [40]:
training_set = featuresets[:1900]

testing_set = featuresets[1900:]

In [41]:
classifier = nltk.NaiveBayesClassifier.train(training_set)

## Print the accuracy 

In [42]:
print("Classifier accuracy:",(nltk.classify.accuracy(classifier, testing_set))*100,"%")

Classifier accuracy: 86.0 %


## Valuable words 

In [43]:
classifier.show_most_informative_features(15)

Most Informative Features
                   sucks = True              neg : pos    =      9.9 : 1.0
                  annual = True              pos : neg    =      9.5 : 1.0
                     ugh = True              neg : pos    =      9.1 : 1.0
                 frances = True              pos : neg    =      8.9 : 1.0
             silverstone = True              neg : pos    =      7.8 : 1.0
                  suvari = True              neg : pos    =      7.1 : 1.0
                    mena = True              neg : pos    =      7.1 : 1.0
                  shoddy = True              neg : pos    =      7.1 : 1.0
              schumacher = True              neg : pos    =      7.1 : 1.0
                 idiotic = True              neg : pos    =      7.1 : 1.0
                  regard = True              pos : neg    =      6.9 : 1.0
               atrocious = True              neg : pos    =      6.7 : 1.0
                everyday = True              pos : neg    =      6.6 : 1.0

# Save the trained classifier  

In [44]:
save_classifier = open("naivebayes.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()

# Use the saved classifier 

In [45]:
classifier_u = open("naivebayes.pickle","rb")
classifier = pickle.load(classifier_u)
classifier_u.close()

# Using SKLearn algo from NLTK 

In [46]:
MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)

<SklearnClassifier(MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))>

In [47]:
print("MNB_classifier ",nltk.classify.accuracy(MNB_classifier, testing_set)*100)

MNB_classifier  86.0


In [48]:
BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(training_set)
print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testing_set))*100)

LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
LogisticRegression_classifier.train(training_set)
print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100)

SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
SGDClassifier_classifier.train(training_set)
print("SGDClassifier_classifier accuracy percent:", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100)

SVC_classifier = SklearnClassifier(SVC())
SVC_classifier.train(training_set)
print("SVC_classifier accuracy percent:", (nltk.classify.accuracy(SVC_classifier, testing_set))*100)

LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(training_set)
print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100)

NuSVC_classifier = SklearnClassifier(NuSVC())
NuSVC_classifier.train(training_set)
print("NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set))*100)

BernoulliNB_classifier accuracy percent: 86.0




LogisticRegression_classifier accuracy percent: 84.0




SGDClassifier_classifier accuracy percent: 82.0




SVC_classifier accuracy percent: 80.0
LinearSVC_classifier accuracy percent: 85.0
NuSVC_classifier accuracy percent: 88.0


## Combining the classifiers 

In [49]:
class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers = classifiers
    
    def classify(self, features):
        votes=[]
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
    
    def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
            
        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf

In [50]:
voted_classifier = VoteClassifier(classifier,
                                 NuSVC_classifier,
                                 LinearSVC_classifier,
                                 SGDClassifier_classifier,
                                 MNB_classifier,
                                 BernoulliNB_classifier,
                                 LogisticRegression_classifier)

print("Voted_classifier accuracy percent:", (nltk.classify.accuracy(voted_classifier, testing_set)*100))

print("Classification: ", voted_classifier.classify(training_set[0][0]),
     "Confidence: ", voted_classifier.confidence(training_set[0][0])*100)

print("Classification: ", voted_classifier.classify(training_set[1][0]),
     "Confidence: ", voted_classifier.confidence(training_set[1][0])*100)

print("Classification: ", voted_classifier.classify(training_set[2][0]),
     "Confidence: ", voted_classifier.confidence(training_set[2][0])*100)

print("Classification: ", voted_classifier.classify(training_set[3][0]),
     "Confidence: ", voted_classifier.confidence(training_set[3][0])*100)

print("Classification: ", voted_classifier.classify(training_set[4][0]),
     "Confidence: ", voted_classifier.confidence(training_set[4][0])*100)

Voted_classifier accuracy percent: 90.0
Classification:  neg Confidence:  100.0
Classification:  pos Confidence:  100.0
Classification:  neg Confidence:  100.0
Classification:  pos Confidence:  100.0
Classification:  neg Confidence:  100.0


## As there are fluxuations in Accuracy percent on live twitter analysis, we are going to train our algirithm in different better dataset 

In [71]:
short_pos = open("positive.txt","r").read()
short_neg = open("negative.txt","r").read()

documents = []
all_words = []
allowed_word_types = ["J"]

for p in short_pos.split('\n'):
    documents.append( (p, "pos") )
    words = word_tokenize(p)
    pos = nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())

    
for p in short_neg.split('\n'):
    documents.append( (p, "neg") )
    words = word_tokenize(p)
    pos = nltk.pos_tag(words)
    for w in pos:
        if w[1][0] in allowed_word_types:
            all_words.append(w[0].lower())
    
save_document = open("documents.pickle", "wb")
pickle.dump(documents, save_document)
save_document.close()


short_pos_words = word_tokenize(short_pos)
short_neg_words = word_tokenize(short_neg)

for w in short_pos_words:
    all_words.append(w.lower())
    
for w in short_neg_words:
    all_words.append(w.lower())
    
all_words = nltk.FreqDist(all_words)

AttributeError: 'FreqDist' object has no attribute 'append'

In [52]:
word_features = list(all_words.keys())[:5000]
def find_features(document):
    words = word_tokenize(document)
    features = {}
    for w in word_features:
        features[w] = (w in words)
    return features

In [53]:
featuresets = [(find_features(rev), category) for (rev, category) in documents]
random.shuffle(featuresets)

In [68]:
save_features = open("features.pickle", "wb")
pickle.dump(word_features, save_features)
save_features.close()

training_set = featuresets[:10000]

testing_set = featuresets[10000:]

#training_set = featuresets[100:]
#testing_set = featuresets[:100]

classifier = nltk.NaiveBayesClassifier.train(training_set)

print("Classifier accuracy:",(nltk.classify.accuracy(classifier, testing_set))*100,"%")

classifier.show_most_informative_features(15)

save_classifier = open("naivebayes.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()

classifier_u = open("naivebayes.pickle", "rb")
classifier = pickle.load(classifier_u)
classifier_u.close()

MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)

print("MNB_classifier ",nltk.classify.accuracy(MNB_classifier, testing_set)*100)

BernoulliNB_classifier = SklearnClassifier(BernoulliNB())
BernoulliNB_classifier.train(training_set)
print("BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(BernoulliNB_classifier, testing_set))*100)

LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
LogisticRegression_classifier.train(training_set)
print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100)

SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
SGDClassifier_classifier.train(training_set)
print("SGDClassifier_classifier accuracy percent:", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100)

SVC_classifier = SklearnClassifier(SVC())
SVC_classifier.train(training_set)
print("SVC_classifier accuracy percent:", (nltk.classify.accuracy(SVC_classifier, testing_set))*100)

LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(training_set)
print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100)

NuSVC_classifier = SklearnClassifier(NuSVC())
NuSVC_classifier.train(training_set)
print("NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set))*100)

class VoteClassifier(ClassifierI):
    def __init__(self, *classifiers):
        self._classifiers = classifiers
    
    def classify(self, features):
        votes=[]
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
        return mode(votes)
    
    def confidence(self, features):
        votes = []
        for c in self._classifiers:
            v = c.classify(features)
            votes.append(v)
            
        choice_votes = votes.count(mode(votes))
        conf = choice_votes / len(votes)
        return conf
    
voted_classifier = VoteClassifier(classifier,
                                 NuSVC_classifier,
                                 LinearSVC_classifier,
                                 SGDClassifier_classifier,
                                 MNB_classifier,
                                 BernoulliNB_classifier,
                                 LogisticRegression_classifier)

print("Voted_classifier accuracy percent:", (nltk.classify.accuracy(voted_classifier, testing_set)*100))

print("Classification: ", voted_classifier.classify(training_set[0][0]),
     "Confidence: ", voted_classifier.confidence(training_set[0][0])*100)

print("Classification: ", voted_classifier.classify(training_set[1][0]),
     "Confidence: ", voted_classifier.confidence(training_set[1][0])*100)

print("Classification: ", voted_classifier.classify(training_set[2][0]),
     "Confidence: ", voted_classifier.confidence(training_set[2][0])*100)

print("Classification: ", voted_classifier.classify(training_set[3][0]),
     "Confidence: ", voted_classifier.confidence(training_set[3][0])*100)

print("Classification: ", voted_classifier.classify(training_set[4][0]),
     "Confidence: ", voted_classifier.confidence(training_set[4][0])*100)

Classifier accuracy: 72.50755287009063 %
Most Informative Features
              engrossing = True              pos : neg    =     19.8 : 1.0
                captures = True              pos : neg    =     18.4 : 1.0
                provides = True              pos : neg    =     17.1 : 1.0
              unexpected = True              pos : neg    =     15.1 : 1.0
               inventive = True              pos : neg    =     15.1 : 1.0
              refreshing = True              pos : neg    =     13.7 : 1.0
            refreshingly = True              pos : neg    =     13.1 : 1.0
                  unique = True              pos : neg    =     13.1 : 1.0
               wonderful = True              pos : neg    =     13.1 : 1.0
                    warm = True              pos : neg    =     12.3 : 1.0
               realistic = True              pos : neg    =     11.7 : 1.0
             mesmerizing = True              pos : neg    =     11.7 : 1.0
                    ages = True  



LogisticRegression_classifier accuracy percent: 74.01812688821752




SGDClassifier_classifier accuracy percent: 68.12688821752266




SVC_classifier accuracy percent: 47.88519637462236
LinearSVC_classifier accuracy percent: 72.65861027190333
NuSVC_classifier accuracy percent: 74.01812688821752
Voted_classifier accuracy percent: 72.9607250755287
Classification:  pos Confidence:  100.0
Classification:  neg Confidence:  100.0
Classification:  pos Confidence:  100.0
Classification:  neg Confidence:  100.0
Classification:  pos Confidence:  100.0


## Pickle MNB_Classifier 

In [57]:
save_classifier = open("MNB.pickle","wb")
pickle.dump(MNB_classifier, save_classifier)
save_classifier.close()

## Pickle  BernoulliNB_classifier

In [58]:
save_classifier = open("Bernouli.pickle","wb")
pickle.dump(BernoulliNB_classifier, save_classifier)
save_classifier.close()

## Pickle LogisticRegression_classifier 

In [59]:
save_classifier = open("LogisticRegression.pickle","wb")
pickle.dump(LogisticRegression_classifier, save_classifier)
save_classifier.close()

## Pickle SGDClassifier_classifier 

In [60]:
save_classifier = open("SGDClassifier.pickle","wb")
pickle.dump(SGDClassifier_classifier, save_classifier)
save_classifier.close()

## Pickle SVC_classifier 

In [62]:
save_classifier = open("SVC.pickle","wb")
pickle.dump(SVC_classifier, save_classifier)
save_classifier.close()

## Pickle LinearSVC_classifier 

In [63]:
save_classifier = open("LinearSVC.pickle","wb")
pickle.dump(LinearSVC_classifier, save_classifier)
save_classifier.close()

## Pickle NuSVC_classifier

In [64]:
save_classifier = open("NuSVC.pickle","wb")
pickle.dump(NuSVC_classifier, save_classifier)
save_classifier.close()

## Pickle Voted_classifier  

In [65]:
save_classifier = open("Voted.pickle","wb")
pickle.dump(voted_classifier, save_classifier)
save_classifier.close()