In [6]:
import numpy as np 
import string
from nltk.corpus import stopwords
import pandas as pd 
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfTransformer,TfidfVectorizer
from sklearn.pipeline import Pipeline
import nltk
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')

In [2]:
open('/kaggle/input/simple-dialogs-for-chatbot/dialogs.txt', 'r').readlines()[:4]

["hi, how are you doing?\ti'm fine. how about yourself?\n",
 "i'm fine. how about yourself?\ti'm pretty good. thanks for asking.\n",
 "i'm pretty good. thanks for asking.\tno problem. so how have you been?\n",
 "no problem. so how have you been?\ti've been great. what about you?\n"]

In [3]:
data = pd.read_csv('/kaggle/input/simple-dialogs-for-chatbot/dialogs.txt', sep='\t')
data.head()

Unnamed: 0,"hi, how are you doing?",i'm fine. how about yourself?
0,i'm fine. how about yourself?,i'm pretty good. thanks for asking.
1,i'm pretty good. thanks for asking.,no problem. so how have you been?
2,no problem. so how have you been?,i've been great. what about you?
3,i've been great. what about you?,i've been good. i'm in school right now.
4,i've been good. i'm in school right now.,what school do you go to?


In [4]:
data.shape

(3724, 2)

In [5]:
data.isnull().sum()

hi, how are you doing?           0
i'm fine. how about yourself?    0
dtype: int64

In [6]:
data.columns = ['question', 'answer']
data.head()

Unnamed: 0,question,answer
0,i'm fine. how about yourself?,i'm pretty good. thanks for asking.
1,i'm pretty good. thanks for asking.,no problem. so how have you been?
2,no problem. so how have you been?,i've been great. what about you?
3,i've been great. what about you?,i've been good. i'm in school right now.
4,i've been good. i'm in school right now.,what school do you go to?


In [7]:
def clean_txt(txt):
    txt = ''.join([t for t in txt.lower() if t not in string.punctuation])
    return txt.split()

In [9]:
clean_txt('how about you? I am fine now')

['how', 'about', 'you', 'i', 'am', 'fine', 'now']

## First Way

In [11]:
pipe = Pipeline([
    ('vectorize', CountVectorizer(analyzer=clean_txt)),
    ('transform', TfidfTransformer()),
    ('classifier', DecisionTreeClassifier())
])

In [12]:
pipe.fit(data['question'], data['answer'])

Pipeline(steps=[('vectorize',
                 CountVectorizer(analyzer=<function clean_txt at 0x7fc9f40494d0>)),
                ('transform', TfidfTransformer()),
                ('classifier', DecisionTreeClassifier())])

In [13]:
pipe.predict(['how are you?'])

array(["i'm doing great. what about you?"], dtype=object)

In [14]:
pipe.predict(['How are you doing'])[0]

"i'm doing great. what about you?"

In [15]:
pipe.predict(['How are you doing now during the christmas holidays'])[0]

"you'd better buy your ticket now."

In [17]:
exit = ['bye', 'thanks', 'break', 'quit']
flag = True
print('BOT : Hi, I am Marvine. if you want to exit, type bye. How can I help you?')

while(flag):
    user_input = input()
    if user_input.lower() in exit:
        print('See you later...Take care <3')
        break
    else:
        out = pipe.predict([user_input])[0]
        print(f'BOT : {out}')

BOT : Hi, I am Marvine. if you want to exit, type bye. How can I help you?


 How are you


BOT : i'm doing great. what about you?


 I am fine 


BOT : you should go to bed.


 not now


BOT : then why are you watching tv?


 to watch my favorite movie


BOT : well, it's good that you're so talented.


 thanks 


BOT : if only i was talented.


 what do you mean


BOT : a team of doctors gave her a whole new face.


 bye for now


BOT : i was crossing the street.


 bye


See you later...Take care <3


## Second Way

In [7]:
file = open('../input/simple-dialogs-for-chatbot/dialogs.txt','r').read()

In [8]:
file[:100]

"hi, how are you doing?\ti'm fine. how about yourself?\ni'm fine. how about yourself?\ti'm pretty good. "

In [10]:
sentence_list = nltk.sent_tokenize(file)
sentence_list[:3]

['hi, how are you doing?', "i'm fine.", 'how about yourself?']

In [11]:
def bot_response(user_input):
    user_input = user_input.lower()
    sentence_list.append(user_input)

    bot_response = ''

    cm = CountVectorizer().fit_transform(sentence_list)
    similarity_scores = cosine_similarity(cm[-1], cm)
    similarity_scores_list = similarity_scores.flatten()

    index = np.argsort(similarity_scores_list)[::-1]
    index = index[1:]
    response_flag = 0

    j = 0
    for i in range(len(index)):
        if similarity_scores_list[index[i]] > 0.0:
            bot_response += sentence_list[index[i]]
            response_flag = 1
            j+=1

        if j>2:
            break

    if response_flag == 0:
        bot_response += "I apologize, I don't understand"

    sentence_list.remove(user_input)
    return bot_response

In [12]:
print('BOT : Hi, I am Marvine. if you want to exit, type bye. How can I help you?')
exit = ['bye', 'thanks', 'break', 'quit']

while(True):
    user_input = input()
    if user_input.lower() in exit:
        print('See you later...Take care <3')
        break
  
    else:
        print('Doc Bot : '+bot_response(user_input))

BOT : Hi, I am Marvine. if you want to exit, type bye. How can I help you?


 how are you?


Doc Bot : hi, how are you.how smart are you?how are you doing that?


 I didn't do anything yet


Doc Bot : i didn't do anything.i didn't do anything.we don't do anything.


 ohhhh


Doc Bot : I apologize, I don't understand


 bye


See you later...Take care <3
