In [1]:
# Load data preprocessing libs
import pandas as pd
import numpy as np

import re
from bs4 import BeautifulSoup

# Load vectorizer and similarity measure
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
df = pd.read_csv("Questions.csv")
an = pd.read_csv("Answers.csv")

In [3]:
an.drop(columns=['Id','OwnerUserId','CreationDate'],inplace=True)

In [4]:
an.head()

Unnamed: 0,ParentId,Score,Body
0,469,4,<p>open up a terminal (Applications-&gt;Utilit...
1,469,2,<p>I haven't been able to find anything that d...
2,502,9,<p>You can use ImageMagick's convert utility f...
3,535,23,<p>One possibility is Hudson. It's written in...
4,535,20,"<p>We run <a href=""http://buildbot.net/trac"">B..."


In [5]:
an.rename(columns={'ParentId':'Id'},inplace=True)
an.head()

Unnamed: 0,Id,Score,Body
0,469,4,<p>open up a terminal (Applications-&gt;Utilit...
1,469,2,<p>I haven't been able to find anything that d...
2,502,9,<p>You can use ImageMagick's convert utility f...
3,535,23,<p>One possibility is Hudson. It's written in...
4,535,20,"<p>We run <a href=""http://buildbot.net/trac"">B..."


In [6]:
an = an[an['Score']>5]
an.head()

Unnamed: 0,Id,Score,Body
2,502,9,<p>You can use ImageMagick's convert utility f...
3,535,23,<p>One possibility is Hudson. It's written in...
4,535,20,"<p>We run <a href=""http://buildbot.net/trac"">B..."
5,594,25,<p>The canonical way is to use the built-in cu...
6,535,14,<p>Second the Buildbot - Trac integration. You...


In [7]:
df = df.merge(an, on='Id')
df.head()

Unnamed: 0,Id,OwnerUserId,CreationDate,Score_x,Title,Body_x,Score_y,Body_y
0,469,147.0,2008-08-02T15:11:16Z,21,How can I find the full path to a font from it...,<p>I am using the Photoshop's javascript API t...,12,<p>Unfortunately the only API that isn't depre...
1,502,147.0,2008-08-02T17:01:58Z,27,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,9,<p>You can use ImageMagick's convert utility f...
2,502,147.0,2008-08-02T17:01:58Z,27,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,25,<p>ImageMagick delegates the PDF->bitmap conve...
3,535,154.0,2008-08-02T18:43:54Z,40,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,23,<p>One possibility is Hudson. It's written in...
4,535,154.0,2008-08-02T18:43:54Z,40,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,20,"<p>We run <a href=""http://buildbot.net/trac"">B..."


In [8]:
df.drop(columns=['Id','OwnerUserId','CreationDate','Score_x','Score_y'],inplace=True)
df.head()

Unnamed: 0,Title,Body_x,Body_y
0,How can I find the full path to a font from it...,<p>I am using the Photoshop's javascript API t...,<p>Unfortunately the only API that isn't depre...
1,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,<p>You can use ImageMagick's convert utility f...
2,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,<p>ImageMagick delegates the PDF->bitmap conve...
3,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,<p>One possibility is Hudson. It's written in...
4,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,"<p>We run <a href=""http://buildbot.net/trac"">B..."


In [9]:
df.rename(columns={'Body_x':'Question','Body_y':'Asswer'},inplace=True)

In [10]:
df.head()

Unnamed: 0,Title,Question,Asswer
0,How can I find the full path to a font from it...,<p>I am using the Photoshop's javascript API t...,<p>Unfortunately the only API that isn't depre...
1,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,<p>You can use ImageMagick's convert utility f...
2,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,<p>ImageMagick delegates the PDF->bitmap conve...
3,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,<p>One possibility is Hudson. It's written in...
4,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,"<p>We run <a href=""http://buildbot.net/trac"">B..."


In [11]:
df['Asswer'] = df['Asswer'].apply(lambda x:BeautifulSoup(x).get_text())

In [12]:
df.head()

Unnamed: 0,Title,Question,Asswer
0,How can I find the full path to a font from it...,<p>I am using the Photoshop's javascript API t...,Unfortunately the only API that isn't deprecat...
1,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,You can use ImageMagick's convert utility for ...
2,Get a preview JPEG of a PDF on Windows?,<p>I have a cross-platform (Python) applicatio...,ImageMagick delegates the PDF->bitmap conversi...
3,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,One possibility is Hudson. It's written in Ja...
4,Continuous Integration System for a Python Cod...,<p>I'm starting work on a hobby project with a...,"We run Buildbot - Trac at work, I haven't used..."


In [13]:
vectorizer = TfidfVectorizer()
vectorizer.fit(np.concatenate((df.Question, df.Asswer)))

TfidfVectorizer()

In [14]:
Question_vectors = vectorizer.transform(df.Question)

In [15]:
def chatbot_response(msg):
    input_question =BeautifulSoup(msg).get_text()

    # Locate the closest question
    input_question_vector = vectorizer.transform([input_question])

    # Compute similarities
    similarities = cosine_similarity(input_question_vector, Question_vectors)

    # Find the closest question
    closest = np.argmax(similarities, axis=1)
    return df.Asswer.iloc[closest].values[0]

In [19]:
import tkinter
from tkinter import *
def send():
    msg = EntryBox.get("1.0",'end-1c').strip()
    EntryBox.delete("0.0",END)
    if msg != '':
        ChatLog.config(state=NORMAL)
        ChatLog.insert(END, "You: " + msg + '\n\n')
        ChatLog.config(foreground="#442265", font=("Verdana", 12 ))
        res = chatbot_response(msg)
        ChatLog.insert(END, "Bot: " + res + '\n\n')
        ChatLog.config(state=DISABLED)
        ChatLog.yview(END)

base = Tk()
base.title("PyBOT")
base.geometry("400x500")
base.resizable(width=FALSE, height=FALSE)

ChatLog = Text(base, bd=0, bg="white", height="8", width="50", font="Arial",)
ChatLog.config(state=DISABLED)

scrollbar = Scrollbar(base, command=ChatLog.yview, cursor="heart")
ChatLog['yscrollcommand'] = scrollbar.set

SendButton = Button(base, font=("Verdana",12,'bold'), text="Send", width="12", height=5,
                    bd=0, bg="#32de97", activebackground="#3c9d9b",fg='#ffffff',
                    command= send )

EntryBox = Text(base, bd=0, bg="white",width="29", height="5", font="Arial")

scrollbar.place(x=376,y=6, height=386)
ChatLog.place(x=6,y=6, height=386, width=370)
EntryBox.place(x=128, y=401, height=90, width=265)
SendButton.place(x=6, y=401, height=90)
base.mainloop()