In [2]:
import tensorflow as tf
import numpy as np
import pandas as pd
import json
import string
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
import random

In [4]:
with open('/content/input.json', 'r') as f:
    data = json.load(f)

df = pd.DataFrame(data['intents'])

In [5]:
def map_tag_pattern(df, tag_col, text_col, res_col):
    tags = []
    inputs = []
    responses = {}

    for _, item in df.iterrows():
        ptrns = item[text_col]
        tag = item[tag_col]
        responses[item[tag_col]] = item[res_col]
        for j in range(len(ptrns)):
            tags.append(tag)
            inputs.append(ptrns[j])

    return tags, inputs, responses

# Assuming df is your original DataFrame
tags, inputs, responses = map_tag_pattern(df, "tag", "patterns", "responses")

# Example of how to use the lists
print("Tags:", tags)
print("Patterns:", inputs)
print("Responses:", responses)

Tags: ['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'courtesy-greeting', 'courtesy-greeting', 'courtesy-greeting', 'courtesy-greeting', 'courtesy-greeting', 'courtesy-greeting', 'courtesy-greeting', 'morning', 'afternoon', 'evening', 'night', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'counter-goodbye', 'counter-goodbye', 'counter-goodbye', 'counter-goodbye', 'counter-goodbye', 'counter-goodbye', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'thanks', 'no-response', 'neutral-response', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'about', 'skill', 'creation', 'creation', 'creation', 'name', 'name', 'name', 'help', 'help', 'help', 'help', 'help', 'help', 'help', 'sad', 'sad', 'sad', 'sad', 'sad',

In [6]:
data = pd.DataFrame({"inputs":inputs, "tags":tags})
data

Unnamed: 0,inputs,tags
0,Hi,greeting
1,Hey,greeting
2,Is anyone there?,greeting
3,Hi there,greeting
4,Hello,greeting
...,...,...
261,I want to learn about mental health.,learn-mental-health
262,I want to learn more about mental health.,learn-mental-health
263,I'm interested in learning about mental health.,learn-mental-health
264,Tell me a fact about mental health,mental-health-fact


In [7]:
data["inputs"] = data["inputs"].apply(lambda wrd: ''.join([ltrs.lower() for ltrs in wrd if ltrs not in string.punctuation]))

In [8]:
tokenizer = Tokenizer(num_words = 2000)
tokenizer.fit_on_texts(data["inputs"])
train = tokenizer.texts_to_sequences(data["inputs"])
le = LabelEncoder()

In [9]:
x_train = pad_sequences(train)
y_train = le.fit_transform(data["tags"])

In [10]:
# hyperparameters
epochs=500
input_shape = x_train.shape[1]
vocab_size = len(tokenizer.word_index) + 1
output_length = le.classes_.shape[0]
embed_dim=10

In [17]:
# build RNN Model with tensorflow
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(input_shape,)),
    tf.keras.layers.Embedding(vocab_size, embed_dim, input_length=(input_shape,)),
    tf.keras.layers.LSTM(units=10, return_sequences=True),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=10, activation='relu'),
    tf.keras.layers.Dense(units=5, activation='relu'),
    tf.keras.layers.Dense(output_length, activation='softmax')
])

optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 18, 10)            2890      
                                                                 
 lstm_4 (LSTM)               (None, 18, 10)            840       
                                                                 
 flatten_3 (Flatten)         (None, 180)               0         
                                                                 
 dense_12 (Dense)            (None, 10)                1810      
                                                                 
 dense_13 (Dense)            (None, 5)                 55        
                                                                 
 dense_14 (Dense)            (None, 58)                348       
                                                                 
Total params: 5943 (23.21 KB)
Trainable params: 5943 (

In [18]:
# early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4)

# train the model
# model.fit(x_train, y_train, epochs=epochs, callbacks=[early_stop])
model.fit(x_train, y_train, epochs=epochs)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.src.callbacks.History at 0x7f0640f833a0>

In [19]:
while True:
    textList = []
    prediction_input = []
    user_input = input("Input (press 'q' to quit): ")
    if user_input.lower() == 'q':
        break
    else:
        for letter in user_input:
            if letter not in string.punctuation:
                prediction_input.append(letter.lower())

        prediction_input = ''.join(prediction_input)
        textList.append(prediction_input)

        prediction_input = tokenizer.texts_to_sequences(textList)
        prediction_input = np.array(prediction_input).reshape(-1)
        prediction_input = pad_sequences([prediction_input], input_shape)

        output = model.predict(prediction_input)
        output = output.argmax()

        response_tag = le.inverse_transform([output])[0]
        print("AI: ", random.choice(responses[response_tag]))

Input (press 'q' to quit): hi
AI:  Hello there. Glad to see you're back. What's going on in your world right now?
Input (press 'q' to quit): how are you
AI:  Hello, I am great, how are you? Please tell me your GeniSys user
Input (press 'q' to quit): good morning
AI:  Good morning. I hope you had a good night's sleep. How are you feeling today? 
Input (press 'q' to quit): q
