In [108]:
! pip install laser_encoders



In [109]:
json_data = {
  "intents": [
    {
      "tag": "greeting",
      "patterns": [
        "Hi",
        "Hey",
        "How are you",
        "Is anyone there?",
        "Hello",
        "Good day",
        "What is your name?"
      ],
      "responses": [
        "Hi there, how can Nimble help?\nYou can ask me about bank information like"
      ]
    },
    {
      "tag": "quit",
      "patterns": ["Bye", "See you later", "Goodbye"],
      "responses": [
        "See you later, thanks for visiting",
        "Have a nice day",
        "Bye! Come back again soon."
      ]
    },
    {
      "tag": "thanks",
      "patterns": ["Thanks", "Thank you", "That's helpful", "Thank's a lot!"],
      "responses": ["Happy to help!", "Any time!", "My pleasure"]
    },
    {
      "tag": "balance_enquiry",
      "patterns": ["can you tell me my account balance", "what is my account balance", "what is my account balance", "what is my account status"],
      "responses":["balance_enquiry"]
    },
    {
      "tag": "balance_graph",
      "patterns": ["can you tell me the graph of balance", "show me the balance graph", "balance graph", "show me my account balance graph"],
      "responses":["balance_graph"]
    },
    {
      "tag": "latest_updates",
      "patterns": ["what are the latest updates about the bank", "what are the updates in policy", "what is the latest news regarding the bank"],
      "responses":["latest_updates"]
    },
    {
      "tag": "interest_rates",
      "patterns": ["what are the interest rates provided by your bank", "tell me about the interest rates of your bank", "interest rates?", "what are the changes in the interest"],
      "responses":["interest_rates"]
    },
    {
      "tag": "UnderstandQuery",
      "patterns": ["Do you understand what I am saying","Do you understand me","Do you know what I am saying","Do you get me","Comprendo","Know what I mean"],
      "responses": ["Well I would not be a very clever AI if I did not would I?","I read you loud and clear!","I do in deed!"]
    },
    {
      "tag": "Shutup",
      "patterns": ["Be quiet","Shut up","Stop talking","Enough talking","Please be quiet","Quiet","Shhh"],
      "responses": ["I am sorry to disturb you","Fine, sorry to disturb you","OK, sorry to disturb you"]
    },
    {
      "tag": "Swearing",
      "patterns": ["fuck off","fuck","twat","shit"],
      "responses": ["Please do not swear","How rude","That is not very nice"]
    },
    {
      "tag": "Clever",
      "patterns": ["You are very clever","You are a very clever girl","You are very intelligent", "You are a very intelligent girl","You are a genious","Genious"],
      "responses": ["Thank you, I was trained that way","I was trained well","Thanks, I was trained that way"]
    },
    # {
    #   "tag": "Jokes",
    #   "patterns": ["Tell me a joke", "Do you know any jokes","How about a joke","Give me a joke","Make me laugh","I need cheering up"],
    #   "responses": [
    #                     "I met a Dutch girl with inflatable shoes last week, phoned her up to arrange a date but unfortunately she'd popped her clogs.  ",
    #                     "So I said 'Do you want a game of Darts?' He said, 'OK then', I said nearest to bull starts'. He said, 'Baa', I said, 'Moo', he said, You're closest'.  ",
    #                     "The other day I sent my girlfriend a huge pile of snow. I rang her up; I said 'Did you get my drift?'  ",
    #                     "So I went down the local supermarket, I said, 'I want to make a complaint, this vinegar's got lumps in it', he said, 'Those are pickled onions'.  "
    #                 ]
    # },
    {
      "tag": "TimeQuery",
      "patterns": ["What is the time?","What's the time?","Do you know what time it is?","Do you know the time?","Can you tell me the time?","Tell me what time it is?","Time"],
      "responses": ["TimeQuery"]
    }
  ]
}

In [110]:
response = [
    "greeting",
    "quit",
    "thanks",
    "balance_enquiry",
    "balance_graph",
    "latest_updates",
    "interest_rates",
    "UnderstandQuery",
    "Shutup",
    "Swearing",
    "Clever",
    "Jokes",
    "TimeQuery"
]

In [111]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from collections import Counter
from tqdm import tqdm
import numpy as np
from laser_encoders import LaserEncoderPipeline
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, Reshape, Dropout
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [112]:
# Extract intent tags
intent_tags = [intent["tag"] for intent in json_data["intents"]]

In [113]:
# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit and transform the intent tags to integer labels
encoded_tags = label_encoder.fit_transform(intent_tags)

In [123]:
# Create the data using tuples (pattern, encoded_tag)
data = [(pattern, encoded_tag) for intent in json_data["intents"] for pattern, encoded_tag in zip(intent["patterns"], encoded_tags)]

response = intent_tags  # Update response list

print(encoded_tags)
# print()

counter = Counter(encoded_tags)

[ 7 10 11  5  6  9  8  4  1  2  0  3]


In [115]:
print("Class distribution:")
for cls, count in counter.items():
    print(f"Class {cls}: {count}")

Class distribution:
Class 7: 1
Class 10: 1
Class 11: 1
Class 5: 1
Class 6: 1
Class 9: 1
Class 8: 1
Class 4: 1
Class 1: 1
Class 2: 1
Class 0: 1
Class 3: 1


In [116]:
# Initialize the LaserEncoder
encoder = LaserEncoderPipeline(lang="eng_Latn")

# Initialize empty arrays to store embeddings
X_embeddings = []

In [117]:
# Encode sentences line-wise using tqdm for progress visualization
print("Encoding sentences:")
for sentence in tqdm([row[0] for row in data]):
    embeddings = encoder.encode_sentences([sentence])[0]
    X_embeddings.append(embeddings)

Encoding sentences:


100%|██████████| 59/59 [00:02<00:00, 20.45it/s]


In [118]:
# Convert list to numpy array
X_embeddings = np.array(X_embeddings)

# Convert tags to a NumPy array
y_train = np.array([row[1] for row in data])

In [119]:
# Build a neural network model with RNN
model = Sequential()
model.add(Dense(256, input_shape=(1024,), activation='tanh'))
model.add(Reshape((1, 256)))
model.add(SimpleRNN(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))  # Adding dropout for regularization
model.add(Dense(len(set(encoded_tags)), activation='softmax'))

# Use a learning rate scheduler
def lr_schedule(epoch):
    return 0.0001 * 0.9 ** epoch

opt = Adam(learning_rate=0.0001)
lr_scheduler = LearningRateScheduler(lr_schedule)

# Compile the model
model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print model summary to check the architecture
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 256)               262400    
                                                                 
 reshape_8 (Reshape)         (None, 1, 256)            0         
                                                                 
 simple_rnn_8 (SimpleRNN)    (None, 128)               49280     
                                                                 
 dense_25 (Dense)            (None, 64)                8256      
                                                                 
 dropout_8 (Dropout)         (None, 64)                0         
                                                                 
 dense_26 (Dense)            (None, 12)                780       
                                                                 
Total params: 320716 (1.22 MB)
Trainable params: 32071

In [120]:
# Train the model without validation split
model.fit(X_embeddings, y_train, epochs=30, batch_size=32, callbacks=[lr_scheduler])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x79e9e677e1d0>

In [121]:
texts_to_predict_how_are_you = {
    'hindi': "क्या हाल है आपका",
    'portuguese': "Como você está",
    'romanian': "Ce mai faci",
    'slovenian': "Kako se počutiš",
    'chinese': "你好吗",
    'french': "Comment ça va",
    'dutch': "Hoe gaat het met je",
    'russian': "Как вы",
    'italian': "Come stai",
    'bosnian': "Kako si"
}

In [124]:
# Iterate through the dictionary and extract values
for language, user_text in texts_to_predict_how_are_you.items():
    print(f"{language.capitalize()}: {user_text}")

    # Encode the user text
    user_text_embedding = encoder.encode_sentences([user_text])[0]
    user_text_embedding = np.reshape(user_text_embedding, (1, -1))

    # Predict intent using the trained model
    predicted_intent_index = np.argmax(model.predict(user_text_embedding))
    predicted_intent_tag = response[predicted_intent_index]

    print(f"Predicted Intent (Class Number): {predicted_intent_index}")
    print(f"Predicted Intent (Tag Name): {predicted_intent_tag}")
    print()


Hindi: क्या हाल है आपका
Predicted Intent (Class Number): 7
Predicted Intent (Tag Name): UnderstandQuery

Portuguese: Como você está
Predicted Intent (Class Number): 7
Predicted Intent (Tag Name): UnderstandQuery

Romanian: Ce mai faci
Predicted Intent (Class Number): 11
Predicted Intent (Tag Name): TimeQuery

Slovenian: Kako se počutiš
Predicted Intent (Class Number): 7
Predicted Intent (Tag Name): UnderstandQuery

Chinese: 你好吗
Predicted Intent (Class Number): 11
Predicted Intent (Tag Name): TimeQuery

French: Comment ça va
Predicted Intent (Class Number): 11
Predicted Intent (Tag Name): TimeQuery

Dutch: Hoe gaat het met je
Predicted Intent (Class Number): 11
Predicted Intent (Tag Name): TimeQuery

Russian: Как вы
Predicted Intent (Class Number): 11
Predicted Intent (Tag Name): TimeQuery

Italian: Come stai
Predicted Intent (Class Number): 7
Predicted Intent (Tag Name): UnderstandQuery

Bosnian: Kako si
Predicted Intent (Class Number): 11
Predicted Intent (Tag Name): TimeQuery

