In [1]:
import emoji
import contractions
import string
from nltk.corpus import stopwords
import spacy
import re
from joblib import load
import numpy as np

In [2]:
def replace_retweet(tweet, default_replace=""):
    tweet = re.sub('RT\s+', default_replace, tweet)
    return tweet

def replace_user(tweet, default_replace="twitteruser"):
    tweet = re.sub('\B@\w+', default_replace, tweet)
    return tweet

def demojize(tweet):
    tweet = emoji.demojize(tweet)
    return tweet

def replace_url(tweet, default_replace=""):
    tweet = re.sub('(http|https):\/\/\S+', default_replace, tweet)
    return tweet

def replace_hashtag(tweet, default_replace=""):
    tweet = re.sub('#+', default_replace, tweet)
    return tweet

def to_lowercase(tweet):
    tweet = tweet.lower()
    return tweet

def word_repetition(tweet):
    tweet = re.sub(r'(.)\1+', r'\1\1', tweet)
    return tweet

def punct_repetition(tweet, default_replace=""):
    tweet = re.sub(r'[\?\.\!]+(?=[\?\.\!])', default_replace, tweet)
    return tweet

def fix_contractions(tweet):
    tweet = contractions.fix(tweet)
    return tweet

In [3]:
stop_words = set(stopwords.words('english'))
stop_words.discard('not')

nlp = spacy.load('en_core_web_sm')

def process(sent):
    d = nlp(sent)
    tokens=[]
    
    for i in d:
        if i.lemma_ != '-PRON-':
            tokens.append(i.lemma_)
    tokens = [tok for tok in tokens if tok not in stop_words]
    tokens = [tok for tok in tokens if tok not in string.punctuation]

    tokens = ' '.join(tokens)
    return tokens

useless_words = load('words.joblib')
def remove_words(d):
    d = d.split()
    tokens = [tok for tok in d if tok not in useless_words]
    tokens = ' '.join(tokens)
    return tokens

In [4]:
def preprocess_text(sent):
    tweet = replace_retweet(sent, default_replace="")
    tweet = replace_user(tweet, default_replace="twitteruser")
    tweet = demojize(tweet)
    tweet = replace_url(tweet, default_replace="")
    tweet = replace_hashtag(tweet, default_replace="")
    tweet = to_lowercase(tweet)
    tweet = word_repetition(tweet)
    tweet = punct_repetition(tweet, default_replace="")
    tweet = fix_contractions(tweet)
    tweet = process(tweet)
    tweet = remove_words(tweet)
    
    return tweet

In [5]:
def predict(arr):
    arr = preprocess_text(arr)
    vec = load('vec.joblib') 
    bagging = load('logreg.joblib')
    
    arr = np.array([arr])
    vect = vec.transform(arr)
    
    return bagging.predict(vect)

In [6]:
s = input('Enter the sentiment :')
prediction = predict(s)

print(prediction)

Enter the sentiment :@ginidietrich Weather reports of a week of upper 60s make me happy  Perfect running weather!
[1]


In [7]:
s = input('Enter the sentiment :')
prediction = predict(s)

print(prediction)

Enter the sentiment :stupid playstation! stupid controllers don't work so I can't play kingdom hearts
[0]


#### Testing with pywebio

In [6]:
from pywebio.input import input,TEXT
from pywebio.output import put_text

In [7]:
def predict_review():
    s = input("Enter the review：", type = TEXT)
    prediction = predict(s)
    
    if prediction == 0:
        review = 'Negative'
    elif prediction == 1:
        review = 'Positive'
        
    put_text('Sentiment is : ',review)

In [None]:
predict_review()