# 2. Preparing the tools

In [None]:
!pip install tensorflowjs
!pip install googletrans==4.0.0-rc1



In [None]:
#importing the libraries
import tensorflow as tf
import numpy as np
import pandas as pd
import json
import nltk
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Input, Embedding, LSTM , Dense,GlobalMaxPooling1D,Flatten
from tensorflow.keras.models import Model
import matplotlib.pyplot as plt
from IPython.display import display
import string
from googletrans import Translator, constants
import tensorflowjs as tfjs

# 3.Load DATA

In [None]:
# Importing the dataset
data_file = open('intents.json', encoding="utf8").read()
intents = json.loads(data_file)

# getting all the data to lists

tags = []
patterns = []
responses = {} ## dectionaire

for intent in intents['intents']:
    responses[intent['tag']] = intent['responses']
    for lines in intent['patterns']:
        patterns.append(lines)
        tags.append(intent['tag'])

# Converting to dataframe
dict ={'patterns': patterns,
        'tags':tags}
data = pd.DataFrame(dict)

display(data)


Unnamed: 0,patterns,tags
0,Hi,greeting
1,Hey,greeting
2,How are you,greeting
3,Is anyone there?,greeting
4,Hello,greeting
...,...,...
134,What are the types of ratios?,types of ratios
135,What are the types of ratios?,types of ratios
136,What are the asset structure ratios?,the asset structure ratios
137,What are the Solvency Ratios?,Solvency Ratios


In [None]:
# getting some info about the data
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 139 entries, 0 to 138
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   patterns  139 non-null    object
 1   tags      139 non-null    object
dtypes: object(2)
memory usage: 2.3+ KB


The data is stored in a json file, which can be imported and used as a pandas dataframe. This data was manually created by me.

# Pre-Processing the data

Tensorflow’s tokenizer assigns a unique token to each distinct word. and padding is done to get all the data to the same length so as to send it to an RNN layer. target variables are also encoded to decimal values.

> Tokenization is the process of splitting the text into smaller units such as sentences, words or subwords.

> LabelEncoder can be used to normalize labels. It can also be used to transform non-numerical labels (as long as they are hashable and comparable) to numerical labels.

In [None]:
# removing punctuations
data['patterns'] = data['patterns'].apply(lambda wrd:[w.lower() for w in wrd if w not in string.punctuation])
data['patterns'] = data['patterns'].apply(lambda wrd: ''.join(wrd))

#tokenize the data
tokenizer = Tokenizer(num_words=2000)
tokenizer.fit_on_texts(data['patterns'])
train = tokenizer.texts_to_sequences(data['patterns'])

#apply padding (PS: pad_sequences is used to ensure that all sequences in a list have the same length)
x_train = pad_sequences(train)


# encoding the outputs
Lencoder = LabelEncoder()
y_train = Lencoder.fit_transform(data['tags'])

# 4. Input Length, Output Length and Vocabulary

input length and output length are obvious. they are for the input shape and output shape of the neural network. vocabulary size is for the embedding layer to create unique vector representations for each word

In [None]:
# input length
input_shape = x_train.shape[1]
print("input length : ", input_shape)

#define vocabulary
vocabulary = len(tokenizer.word_index)
print("number of unique words: ", vocabulary)

#Output length
output_length = Lencoder.classes_.shape[0]
print("output length: ", output_length)

input length :  12
number of unique words:  198
output length:  38


# 5. Neural Network

In [None]:
#creating the model

i = Input(shape=(input_shape,))
x = Embedding(vocabulary+1,200)(i)
x = LSTM(200,return_sequences=True)(x)
x = Flatten()(x)
x = Dense(output_length,activation="softmax")(x)
model  = Model(i,x)

#compiling the model
model.compile(loss="sparse_categorical_crossentropy",optimizer='adam',metrics=['accuracy'])


#training the model
train = model.fit(x_train,y_train,epochs=200)

Epoch 1/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 61ms/step - accuracy: 0.0652 - loss: 3.6076
Epoch 2/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.1132 - loss: 3.3619
Epoch 3/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 69ms/step - accuracy: 0.1019 - loss: 3.3111
Epoch 4/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 79ms/step - accuracy: 0.1730 - loss: 3.2252
Epoch 5/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 77ms/step - accuracy: 0.2243 - loss: 3.1505
Epoch 6/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step - accuracy: 0.1261 - loss: 3.1070
Epoch 7/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 93ms/step - accuracy: 0.1819 - loss: 2.9952
Epoch 8/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step - accuracy: 0.2152 - loss: 2.8106
Epoch 9/200
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

In [None]:
#chatting
import random
while True:

      texts_p = []
      prediction_input = input('You : ')
      # Translate
      translator = Translator()
      translation = translator.translate(prediction_input, dest='en')
      Language_code = translation.src
      prediction_input = translation.text

      #removing punctuation and converting to lowercase
      prediction_input = [letters.lower() for letters in prediction_input if letters not in string.punctuation]
      prediction_input = ''.join(prediction_input)
      texts_p.append(prediction_input)


      #tokenizing and padding
      prediction_input = tokenizer.texts_to_sequences(texts_p)
      prediction_input = np.array(prediction_input).reshape(-1)
      prediction_input = pad_sequences([prediction_input],input_shape)

      #getting output from model
      output = model.predict(prediction_input)
      output = output.argmax()

      #finding the right tag and predicting
      response_tag = Lencoder.inverse_transform([output])[0]
      translation = translator.translate(random.choice(responses[response_tag]), Language_code)
      print("Going Compta : ",translation.text)
      if response_tag == "goodbye":
        break

In [None]:
model.summary()

In [None]:
keras.saving.save_model(model, 'model.keras')

NameError: name 'keras' is not defined

In [None]:
tfjs.converters.save_keras_model(model, 'models')



failed to lookup keras version from the file,
    this is likely a weight only file


Exception: Error dumping weights, duplicate weight name kernel