In [39]:
import numpy as np
import pandas as pd
import json
from sklearn.feature_extraction.text import CountVectorizer
from scipy.spatial import distance
import random

# Importing Dataset

In [18]:
import warnings
warnings.filterwarnings('ignore')

with open('/content/input.json', 'r') as f:
    data = json.load(f)

df = pd.DataFrame(data['intents'])

# Mapping Dataset

In [19]:
def map_tag_pattern(df, tag_col, text_col, res_col):
  dic = {tag_col:[], text_col:[], res_col:[]}

  for index, item in df.iterrows():
      ptrns = item[text_col]
      rspns = item[res_col]
      tag = item[tag_col]
      for j in range(len(ptrns)):
          dic[tag_col].append(tag)
          dic[text_col].append(ptrns[j])
          dic[res_col].append(rspns)

  return pd.DataFrame.from_dict(dic)

dataset = map_tag_pattern(df, "tag", "patterns", "responses")

# Cosine Distance for Similarity of Texts

In [20]:
def cosine_distance_countvectorizer_method(s1, s2):
    # sentences to list
    allsentences = [s1 , s2]

    # text to vector
    vectorizer = CountVectorizer()
    all_sentences_to_vector = vectorizer.fit_transform(allsentences)
    text_to_vector_v1 = all_sentences_to_vector.toarray()[0].tolist()
    text_to_vector_v2 = all_sentences_to_vector.toarray()[1].tolist()

    # distance of similarity
    cosine = distance.cosine(text_to_vector_v1, text_to_vector_v2)
    return round((1-cosine),2)

# Finding Response

In [36]:
# def respond(text):
#     maximum = float('-inf')
#     responses = []
#     closest = ""
#     closest_response = ""
#     for i in dataset.iterrows():
#         sim = cosine_distance_countvectorizer_method(text, i[1]['patterns'])
#         if sim > maximum:
#             maximum = sim
#             response_maximum = float('-inf')
#             responses = i[1]['responses']
#             for response in responses:
#                 response_sim = cosine_distance_countvectorizer_method(text, response)
#                 if response_sim > response_maximum:
#                   response_maximum = response_sim
#                   closest_response = response
#             closest = i[1]['patterns']
#     return closest_response

In [40]:
def respond(text):
    maximum = float('-inf')
    responses = []
    for i in dataset.iterrows():
        sim = cosine_distance_countvectorizer_method(text, i[1]['patterns'])
        if sim > maximum:
            maximum = sim
            responses = i[1]['responses']
            response = random.choice(responses)
            closest = i[1]['patterns']
    return response

# Chat

In [45]:
while True:
    text = str(input("Input: (press 'q' to quit) "))
    if text.lower() == "q":
        print("Response: Exiting.....")
        break
    print("Response:",respond(text))

Input: (press 'q' to quit) hi
Response: Hi there. How are you feeling today?
Input: (press 'q' to quit) good morning
Response: Good morning. I hope you had a good night's sleep. How are you feeling today? 
Input: (press 'q' to quit) who are you?
Response: Please don't hesitate to talk to me.
Input: (press 'q' to quit) tell me your name
Response: Not sure I understand that.
Input: (press 'q' to quit) i'm feeling so sad lately
Response: Sorry, I didn't understand you.
Input: (press 'q' to quit) shut up
Response: Sorry, I didn't understand you.
Input: (press 'q' to quit) shhhhh
Response: Sorry, I didn't understand you.
Input: (press 'q' to quit) q
Response: Exiting.....
