In [20]:
import pandas as pd
import json
import numpy as np
import random

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

from googletrans import Translator
translator = Translator()

In [8]:
path = './datasets/medical-related.json'
with open(path,"rb") as file:
    data = json.load(file)

In [9]:
patterns = []
responses = []
for intent in data['intents']:
    for pattern in intent['patterns']:
        patterns.append(pattern)
        responses.append(intent['responses'])

print(len(patterns))
print(len(responses))
print(patterns[0:9])
print(responses[0:9])

246
246
['What to do if Cuts?', 'How to cure Cuts?', 'Which medicine to apply for Cuts?', 'what to apply on cuts?', 'Cuts', 'how do you treat abrasions?', 'Do Abrasions cause scars?', 'Abrasions', 'what to do if abrasions?']
[['Wash the cut properly to prevent infection and stop the bleeding by applying pressure for 1-2minutes until bleeding stops. Apply Petroleum Jelly to make sure that the wound is moist for quick healing. Finally cover the cut with a sterile bandage. Pain relievers such as acetaminophen can be applied.'], ['Wash the cut properly to prevent infection and stop the bleeding by applying pressure for 1-2minutes until bleeding stops. Apply Petroleum Jelly to make sure that the wound is moist for quick healing. Finally cover the cut with a sterile bandage. Pain relievers such as acetaminophen can be applied.'], ['Wash the cut properly to prevent infection and stop the bleeding by applying pressure for 1-2minutes until bleeding stops. Apply Petroleum Jelly to make sure that

In [10]:
dataDf = pd.DataFrame({'patterns': patterns, 'responses': responses})
dataDf.head()

Unnamed: 0,patterns,responses
0,What to do if Cuts?,[Wash the cut properly to prevent infection an...
1,How to cure Cuts?,[Wash the cut properly to prevent infection an...
2,Which medicine to apply for Cuts?,[Wash the cut properly to prevent infection an...
3,what to apply on cuts?,[Wash the cut properly to prevent infection an...
4,Cuts,[Wash the cut properly to prevent infection an...


In [11]:

def token_data(data):
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(data)
    tokenized_data = tokenizer.texts_to_sequences(data)  
    word_index = tokenizer.word_index  
    max_leght = max([len(seq) for seq in tokenized_data]) 
    sequences = pad_sequences(tokenized_data, maxlen=max_leght, padding='post') 
    return sequences, word_index, max_leght

In [12]:
inputToken, wordIndexInput, maxInput = token_data(patterns)
outputToken, wordIndexOutput, maxOutput = token_data(responses)

print(len(inputToken))
print(len(outputToken))
print(inputToken[0:9])
print(outputToken[0:9])

print(len(wordIndexInput))
print(len(wordIndexOutput))
print(wordIndexInput)
print(wordIndexOutput)

print(maxInput)
print(maxOutput)

246
246
[[  8   1   3   5  36   0   0   0   0   0   0]
 [  2   1  13  36   0   0   0   0   0   0   0]
 [ 12  10   1  17  16  36   0   0   0   0   0]
 [  8   1  17  82  36   0   0   0   0   0   0]
 [ 36   0   0   0   0   0   0   0   0   0   0]
 [  2   3  11   9  31   0   0   0   0   0   0]
 [  3  31 115 116   0   0   0   0   0   0   0]
 [ 31   0   0   0   0   0   0   0   0   0   0]
 [  8   1   3   5  31   0   0   0   0   0   0]]
[[8]
 [8]
 [8]
 [8]
 [8]
 [2]
 [2]
 [2]
 [2]]
161
55
{'to': 1, 'how': 2, 'do': 3, 'a': 4, 'if': 5, 'i': 6, 'get': 7, 'what': 8, 'treat': 9, 'medicine': 10, 'you': 11, 'which': 12, 'cure': 13, 'take': 14, 'bite': 15, 'for': 16, 'apply': 17, 'pain': 18, 'bleeding': 19, 'have': 20, 'my': 21, 'burn': 22, 'is': 23, 'mild': 24, 'broken': 25, 'eyes': 26, 'heat': 27, 'skin': 28, 'eye': 29, 'care': 30, 'abrasions': 31, 'the': 32, 'nose': 33, 'poisoning': 34, 'itchy': 35, 'cuts': 36, 'strain': 37, 'pulled': 38, 'muscle': 39, 'cough': 40, 'injury': 41, 'in': 42, 'snake': 4

In [13]:
inputShape = inputToken.shape[1]
outputShape = outputToken.shape[1]

model = tf.keras.Sequential([
        tf.keras.Input(shape=(inputShape,)),
        tf.keras.layers.Embedding(len(wordIndexInput)+1,64),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),
        tf.keras.layers.LSTM(32),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(16, activation='relu'),
        tf.keras.layers.Dense(len(wordIndexOutput) + 1, activation='softmax')
    ])
model.summary()

In [16]:
model.compile(loss="sparse_categorical_crossentropy",
              optimizer= tf.keras.optimizers.Adam(learning_rate=0.001),
              metrics=["accuracy"])

y_train = np.array(outputToken)
x_train = inputToken.astype(np.int32)
y_train = y_train.astype(np.int32)

train = model.fit(x_train,y_train,epochs=100, batch_size=32)

Epoch 1/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - accuracy: 0.0128 - loss: 4.0264   
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0247 - loss: 4.0228
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0323 - loss: 4.0196
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0318 - loss: 4.0154
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0458 - loss: 4.0001
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0740 - loss: 3.9635
Epoch 7/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0605 - loss: 3.8461
Epoch 8/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.0321 - loss: 3.7562   
Epoch 9/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [None]:
def predict_class(text, model, wordIndexInput, maxInput):
    tokenizer = Tokenizer()
    tokenizer.word_index = wordIndexInput
    tokenized_text = tokenizer.texts_to_sequences([text])
    padded_text = pad_sequences(tokenized_text, maxlen=maxInput, padding='post')
    prediction = model.predict(padded_text)
    predicted_class = np.argmax(prediction)
    return predicted_class


def get_response(predicted_class, responses):
    return random.choice(responses[predicted_class])


def chatbot():
    while True:
        inputUser = translator.translate(input("User: "), src="id", dest="en")
        user_input = inputUser.text
        if user_input.lower() in ['quit', 'exit', 'bye']:
            break

        predicted_class = predict_class(user_input, model, wordIndexInput, maxInput)

        try:
            bot_response = get_response(predicted_class, responses)
            transRensponse = translator.translate(bot_response, src="en", dest="id")
            print("Bot:", transRensponse.text)
        except IndexError:
            print("Bot: I'm sorry, I don't understand.")

chatbot()