In [98]:
# Feedback

custom_tweet = "People are sad."

In [99]:
# Textrank

import spacy
import pytextrank
nlp = spacy.load("en_core_web_sm")

tr = pytextrank.TextRank(logger=None)
nlp.add_pipe(tr.PipelineComponent, name="textrank", last=True)
tr.load_stopwords(path="stop.json")

In [100]:
# Summarize

def sentence_selection(paragraph):
    sents = []
    para = nlp(paragraph)

    print("pipeline", nlp.pipe_names)

    for phrase in para._.phrases:
        print("{:.4f} {:5d}  {}".format(phrase.rank, phrase.count, phrase.text))
        print(phrase.chunks)

    for sent in para._.textrank.summary(limit_phrases=20, limit_sentences=5):
        if (len(sent) > 3):
            sents.append(sent.text)
    
    return sents

In [101]:
imp_sentences = sentence_selection(custom_tweet)

pipeline ['tagger', 'parser', 'ner', 'textrank']
0.2722     1  people
[People]


In [102]:
custom_tweet = ""
for sent in imp_sentences:
    custom_tweet += " " + sent
    
print(custom_tweet)

 People are sad.


In [103]:
# load model

import pickle

filename = 'finalized_model.sav'
classifier = pickle.load(open(filename, 'rb'))

In [104]:
# Cleaning

import re, string
from nltk.tag import pos_tag
from nltk.stem.wordnet import WordNetLemmatizer

def remove_noise(tweet_tokens, stop_words = ()):

    cleaned_tokens = []

    for token, tag in pos_tag(tweet_tokens):
        token = re.sub('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+#]|[!*\(\),]|'\
                       '(?:%[0-9a-fA-F][0-9a-fA-F]))+','', token)
        token = re.sub("(@[A-Za-z0-9_]+)","", token)

        if tag.startswith("NN"):
            pos = 'n'
        elif tag.startswith('VB'):
            pos = 'v'
        else:
            pos = 'a'

        lemmatizer = WordNetLemmatizer()
        token = lemmatizer.lemmatize(token, pos)

        if len(token) > 0 and token not in string.punctuation and token.lower() not in stop_words:
            cleaned_tokens.append(token.lower())
    return cleaned_tokens

In [107]:
# Predict

from nltk.tokenize import word_tokenize

custom_tokens = remove_noise(word_tokenize(custom_tweet))

result = classifier.classify(dict([token, True] for token in custom_tokens))

In [108]:
result

'Negative'