## Importing Necessary Libraries 

In [1]:
import pandas as pd 
import pickle 
import re 

## Loading the Model, Classifier and Vectorizers

In [2]:
# loading the saved models and vectorizers
classifier = pickle.load(open('model_data.sav', 'rb'))
tfidf_vectorizer = pickle.load(open('tfidf_data.sav', 'rb'))
count_vectorizer = pickle.load(open('bow_data.sav', 'rb'))

## Functions for Cleaning & Stemming of User Input Question 

In [3]:
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer

In [4]:
# function for text cleaning

def clean(text):
    #convert text to string 
    text=str(text) 
    #convert text to lowercase
    text = text.lower()
    # remove backslash-apostrophe 
    text = re.sub("\'", "", text)
    # removing html tags
    html = re.compile('<.*?>')
    cleaned = re.sub(html, ' ', text)
    fil=[]
    for i in cleaned.split():    # splits the text and repalces the unwanted characters with ''
        if i!='c++':
            # remove everything except alphabets
            cleaned=re.sub('[^A-Za-z]', '', i)
            fil.append(cleaned)
        else:
            fil.append(i)
    return fil                  #returning the cleaned text 

In [5]:
# creating a set of english language stopwords 
stop_words = set(stopwords.words('english'))
# using snowball stemmer to stem english words 
stemmer = SnowballStemmer('english')

In [6]:
# function for removing stop words and stemming data

def stem(text):
    fil=[]                                         # initializing an empty list to store stemmed words 
    for i in text:
        if i not in stop_words :
            # using stem function to reduce word to its root form 
            text=(stemmer.stem(i).encode('utf8'))  # encoding the data to utf-8
            fil.append(text)                       # appending to the fil list 
    text=b' '.join(fil)                            # stemmed words in fil list joined together to a single byte string 
    return text

## Creating GUI 

In [7]:
# importing necessary modules for creating a GUI using Tkinter
# Tkniter - standars GUI library of python to create dialogs and buttons 
from tkinter import *
# PIL - python image library provides tools for working with images 
from PIL import ImageTk, Image
import os

In [8]:
# creating instance of tk class, it represents the main window of a GUI application 
root = Tk()

In [9]:
# Title
root.title("Tag Prediction")

''

In [10]:
# creating a label to display text on GUI screen 
label1 = Label(root, text = "Type a Question",font = "Arial 20 bold", fg = 'black')
# grid used to place label on a grid that represents rows and columns 
label1.grid(row = 2, column = 0)

# Text box to enter the question
ques = StringVar()
# entry function used to create a textbox where user can type 
quesEntered = Entry(root, width = 40, textvariable = ques,font="Arial 18 bold")
quesEntered.grid(column = 0, row = 3, padx = 3, pady = 3)

In [11]:
# function that takes the question from user & predicts the tags to the question
ans = StringVar()
def pred():
    # retrieving value of ques variable 
    t = ques.get()
    l=[]
    # pre processing the question - CLEANING and STEMMING
    l.append(stem(clean(t)))
    # using tfidf_vectorizer to transform the question to a vector so it can be used by classifier 
    x=tfidf_vectorizer.transform(l)
    # predicting using classifier 
    t=classifier.predict(x)
    # converting the predicted tags to human readable form
    k=count_vectorizer.inverse_transform(t)
    res = re.sub('[^A-Za-z#+-]+', ' ', str(k[0]))
    ans.set(res)
    label2 = Entry(root,textvariable = ans, font = "Arial 20 bold")
    label2.grid(column =0, row = 5, padx = 3, pady = 3)

In [12]:
# Button that predicts the tags by calling the function predict
button = Button(root,text = "Predict Tag", font = "Arial 18 bold", command = pred)
# placing the button in row 4 and column 3 with padding 
button.grid(column= 0, row = 4, padx = 3, pady = 3)

In [None]:
root.mainloop()