In [679]:
# funkcije iz 02 (potrebne za vektorizator)
def simple_tokenization(review):
    tokens = nltk.tokenize.word_tokenize(review)
    tokens_without_punctuation = [token for token in tokens if token not in string.punctuation]
    return tokens_without_punctuation

def short_form_transform(text):
    text=re.sub("isn't", 'is not', text)
    text=re.sub("aren't", 'are not', text)
    text=re.sub("he's", 'he is', text)
    text=re.sub("wasn't", 'was not',text)
    text=re.sub("there's", 'there is',text)
    text=re.sub("couldn't",'could not',text)
    text=re.sub("can't", 'can not', text)
    text=re.sub("won't", 'will not',text)
    text=re.sub("they're", 'they are',text)
    text=re.sub("she's", 'she is',text)
    text=re.sub("wouldn't", 'would not',text)
    text=re.sub("haven't", 'have not',text)
    text=re.sub("that's", 'that is',text)
    text=re.sub("you've", 'you have',text)
    text=re.sub("he's", 'he is',text)
    text=re.sub("what's", 'what is',text)
    text=re.sub("weren't", 'were not',text)
    text=re.sub("we're", 'we are',text)
    text=re.sub("hasn't", 'has not',text)
    text=re.sub('i’d','i would',text)
    text=re.sub("you'd",'you would',text)
    text=re.sub("shouldn't",'should not',text)
    text=re.sub("let's",'let us',text)
    text=re.sub("i've", 'i have', text)
    text=re.sub("you've", 'you have', text)
    text=re.sub("we've", 'we have', text)
    text=re.sub("they've",'they have',text)
    text=re.sub("you'll",'you will',text)
    text=re.sub("i'm",'i am',text)
    text=re.sub("we've",'we have',text)
    text=re.sub("it's",'it is',text)
    text=re.sub("don't",'do not',text)
    text=re.sub("doesn't", 'does not',text)
    text=re.sub("didn't", 'did not', text)
    text=re.sub("hadn't", 'had not', text)
    text=re.sub("mightn't", 'might not', text)
    text=re.sub("mustn't", 'must not', text)
    text=re.sub("it's",'it is',text)
    return text

# eliminacija html tagova
def strip_html(review):
    return re.sub('<[^<]+?>', '', review)

# eliminacija url-ova
def strip_url(review):
    return re.sub(r'http\S+', '', review)

# c.g.i -> cgi, u.s.a -> usa
def full_stop_abbrev_elim(review):
    pattern = re.compile(r'\b(?:[a-z]\.){2,}', re.I)
    review = pattern.sub(lambda m: m.group().replace('.',''), review)
    return review

def remove_stop_words(tokens):
    stopwords_list = stopwords.words('english')
    tokens_without_stop = [token.strip() for token in tokens if token.strip() not in stopwords_list]
    return tokens_without_stop

def review_preprocessor(text):
    text = text.lower()
    text = short_form_transform(text)
    text = strip_html(text)
    text = strip_url(text)
    text = full_stop_abbrev_elim(text)
    return text

def review_tokenizer(stemming, text):
    tokens = simple_tokenization(text)
    tokens = remove_stop_words(tokens)
            
    stems = []
        
    for token in tokens:
        
        token_pattern = re.compile(r'\b[^\W\d_]+\b')
        if not token_pattern.match(token) or len(token) <= 2:
            continue
        
        stem = stemming.stem(token)
        stems.append(stem)
    return stems

In [680]:
import pickle
import tkinter as tk
from tkinter import ttk

In [681]:
count_vectorizer_filename = 'vectorizers/count.vect'
with open(count_vectorizer_filename, 'rb') as pickle_file:
    count_vectorizer = pickle.load(pickle_file) 

tf_idf_vectorizer_filename = 'vectorizers/tf_idf.vect'
with open(tf_idf_vectorizer_filename, 'rb') as pickle_file:
    tf_idf_vectorizer = pickle.load(pickle_file)

lr_filename = 'models/log_reg.model'
with open(lr_filename, 'rb') as pickle_file:
    lr_model = pickle.load(pickle_file)
    
svm_filename = 'models/svm.model'
with open(svm_filename, 'rb') as pickle_file:
    svm_model = pickle.load(pickle_file)
    
knn_filename = 'models/knn.model'
with open(knn_filename, 'rb') as pickle_file:
    knn_model = pickle.load(pickle_file)
    
vect_dict = {'count': count_vectorizer, 'tf-idf': tf_idf_vectorizer}
model_dict = {'lr': lr_model, 'svm': svm_model, 'knn': knn_model}

In [682]:
root = tk.Tk()
root.geometry('250x170')
root.title('Sentiment analyzer')
root.configure(background='black')

In [683]:
def take_input():
    review = inputtxt.get("1.0", "end-1c")
    
    # odabrani vektorizator i model
    selected_vectorizer = chosen_vectorizer['values'][chosen_vectorizer.current()]
    selected_model = chosen_model['values'][chosen_model.current()]
    tk.messagebox.showinfo(title='Information', message=f'Using {selected_vectorizer} for tokenization and {selected_model} model for prediction!')
    
    
    review_vec = vect_dict[selected_vectorizer].transform([review])
    predicted_label = model_dict[selected_model].predict(review_vec)[0]
    
    predicted_proba = model_dict[selected_model].predict_proba(review_vec)[0]
      
    output.delete('1.0', tk.END)
    
    if predicted_label == 0:
        output.insert('1.0', 'Negative review')
    else:
        output.insert('1.0', 'Positive review')
        
    output_proba.delete('1.0', tk.END)
    output_proba.insert('1.0', f'Negative prob: {predicted_proba[0]:.5f}\nPositive prob: {predicted_proba[1]:.5f}')

In [684]:
def clear_command():
    inputtxt.delete('1.0', tk.END)
    output.delete('1.0', tk.END)
    output_proba.delete('1.0', tk.END)

In [685]:
vectorizer_l = tk.Label(root, text='Select vectorizer')
n = tk.StringVar()
chosen_vectorizer = ttk.Combobox(root, width=20, textvariable=n)
chosen_vectorizer['values'] = tuple(vect_dict.keys())

In [686]:
model_l = tk.Label(root, text='Select model')
n = tk.StringVar()
chosen_model = ttk.Combobox(root, width=20, textvariable=n)
chosen_model['values'] = tuple(model_dict.keys())

In [687]:
l = tk.Label(root, text='Insert review:')
l.config(font=('Courier', 14))

In [688]:
inputtxt = tk.Text(root, height=10, width=50)

In [689]:
output = tk.Text(root, height=2, width=20)

In [690]:
output_proba = tk.Text(root, height=2, width=40)

In [691]:
predict_button = tk.Button(root, text='Predict', command=lambda:take_input())
clear_button = tk.Button(root, text='Clear', command=lambda:clear_command())

In [692]:
vectorizer_l.pack(padx=10, pady=10)
chosen_vectorizer.pack()

model_l.pack(padx=10, pady=10)
chosen_model.pack()

l.pack(padx=10, pady=10)
inputtxt.pack()

predict_button.pack(padx=10, pady=10)
output.pack(padx=10, pady=10)
output_proba.pack()

clear_button.pack()

tk.mainloop()