# ChatBot using the Given Data

In [1]:
import pandas as pd
import json
from sklearn.feature_extraction.text import CountVectorizer
from scipy.spatial import distance
import pickle

# Creating Dataset

In [2]:
# Load the dataset
with open('../data/chat/chat_18-28.json') as json_file:
    data = json.load(json_file)
data = data['intents']

# Cosine Distance for Similarity of Texts

In [3]:
# Convert the JSON data into a DataFrame
rows = []
for i in data:
    intent = i['intent']
    for t, r in zip(i['text'], i['responses']):
        row = {'intent': intent, 'text': t, 'response': r}
        rows.append(row)
dataset = pd.DataFrame(rows)

# Finding Response

The following function will find the most suitable reponse using the cosine distance to find the closest match to the input in the text field and giving the corresponding response.

In [4]:
# Define cosine distance function
def cosine_distance_countvectorizer_method(s1, s2):
    all_sentences = [s1, s2]
    vectorizer = CountVectorizer()
    all_sentences_to_vector = vectorizer.fit_transform(all_sentences)
    text_to_vector_v1 = all_sentences_to_vector.toarray()[0].tolist()
    text_to_vector_v2 = all_sentences_to_vector.toarray()[1].tolist()
    cosine = distance.cosine(text_to_vector_v1, text_to_vector_v2)
    return round((1 - cosine), 2)

# Chat

In [5]:
# Function to respond to user input
def respond(text):
    maximum = float('-inf')
    response = ""
    closest = ""
    for i in dataset.iterrows():
        sim = cosine_distance_countvectorizer_method(text, i[1]['text'])
        if sim > maximum:
            maximum = sim
            response = i[1]['response']
            closest = i[1]['text']
    return response

In [6]:
# Save the model and dataset
with open("../models/cosine_model.pkl", 'wb') as f:
    pickle.dump((dataset, cosine_distance_countvectorizer_method), f)

In [7]:
# Example usage:
# Load the model
with open("../models/cosine_model.pkl", 'rb') as f:
    loaded_model = pickle.load(f)


In [9]:

# Use the loaded model
dataset, cosine_distance_countvectorizer_method = loaded_model

# Example usage:
text = "I am tired today"
print("Response:", respond(text))

Response: That's geat to hear. I'm glad you're feeling this way.
