**Import required libraries**

In [None]:
import pandas as pd
from collections import  Counter
import nltk
from nltk.corpus import stopwords
import string,re
string.punctuation
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Dense, LSTM, Embedding, Bidirectional
from keras.layers import Dropout, Activation, GlobalMaxPooling1D  
from keras import initializers, regularizers, optimizers, constraints, layers
from keras.models import Sequential
from keras.preprocessing import text, sequence
from keras_preprocessing.sequence import pad_sequence
from sklearn.model_selection import train_test_split
import pickle

**Import the previously generated dataset**

How to: change path to where your csv file was saved 

In [1]:



df=pd.read_csv('final_train.csv')

Remove punctuations, URLs and articles

In [4]:
articles=['a','an','the']
def clean_text(text):
    
  clean="".join([i for i in text if i not in string.punctuation])
  clean=re.sub(r"http\S+", "", clean)
  text_clean=[word.lower() for word in clean.split() if word not in articles]
  clean=" ".join(text_clean)
  return clean

df['text']=df['text'].apply(lambda x: clean_text(x))


We need this(num_words) to insert it in embedding layers

In [None]:
def counter_words(text):
  count=Counter()
  for i in text.values:
    for word in i.split():
      count[word]+=1
  return count

text=df.text
counter=counter_words(text)
num_words=len(counter)

Create arrays X: feature and y:labels

In [None]:
X=df['text']

y = pd.get_dummies(df['intent']).values

**Split data into training and testing (validation) data**

In this case, we have choosen training data and test data to be 70 percent and 30 percent of the dataset respectively

In [None]:

X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.30)

Convert words in the feature to tokens.

Convert each command (sentence) of feature to a sequence of tokens.

Padding is done to make the length of each command sequence equal(50 in our case) as this is a requirement to input data into the neural networks. 


In [None]:
tokenizer=text.Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(X_train)

train_sequence=tokenizer.texts_to_sequences(X_train)
train_paded= pad_sequences(train_sequence, maxlen=50)

test_sequence=tokenizer.texts_to_sequences(X_test)
test_paded= pad_sequences(test_sequence, maxlen=50)

Build the LSTM model with appropiate input, hidden and output layers.

In [None]:
model=Sequential()
embedding_size=128

model.add(Embedding(num_words, embedding_size, input_length=50))
model.add(LSTM(64, return_sequences=True))
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.3))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(6, activation='softmax'))

Compile the model

In [None]:
opt = keras.optimizers.Adam(learning_rate=3e-4)
model.compile(optimizer=opt,loss = 'categorical_crossentropy', metrics=['accuracy'])

Fit training data and test the accuracy of the model

In [None]:
model.fit(train_paded, y_train , epochs=5, batch_size=128, validation_data= (test_paded,y_test))

Epoch 1/2
Epoch 2/2


<keras.callbacks.History at 0x7f75f5436fd0>

Save the model and tokenizer.

In [None]:
model.save('intent_LSTM.h5')

with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

'/content/drive/MyDrive/intent_LSTM.h5'

Load the model and tokenizer.
After saving the model, it can be directly loaded (no need to run the corpus and train the model every time) to make predications.

In [None]:
loaded_model = tf.keras.models.load_model('intent_LSTM.h5')

with open('/content/drive/MyDrive/tokenizer.pickle', 'rb') as handle:
    loaded_tokenizer = pickle.load(handle)

Predict new text by taking input from the user

In [None]:
classes=['action', 'query', 'monitor', 'query+action', 'triger+action',
       'trigger+query']

test= input()
seq= loaded_tokenizer.texts_to_sequences([test])
padded = pad_sequences(seq, maxlen=50)
pred = loaded_model.predict(padded)
pred=np.argmax(pred)
print(classes[pred])