<a href="https://colab.research.google.com/github/Tbharvesh/DeepLearning/blob/main/Intend_Detection_LSTM_Bank_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Problem Statement : To Perform the intend detection using **LSTM** on a Bank Dataset

### Workflow :

1. Understand problem
2. Read data and ubderstand it
3. preprocess the data
4. LSTM Model
5. Training and Evaluation of model




In [1]:
import numpy as np
import pandas as pd
import nltk

from sklearn.preprocessing import OneHotEncoder as oneHot
from nltk.corpus import stopwords
from nltk import word_tokenize
from string import punctuation
from nltk.stem import PorterStemmer
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.layers import BatchNormalization, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy as cce
from tensorflow.keras.activations import relu, softmax
from tensorflow.keras.initializers import he_uniform, glorot_uniform
from tensorflow.keras.metrics import AUC
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
from sklearn.metrics import classification_report

nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [51]:
train= pd.read_csv('/content/Bank_dataset.csv',
                       names= [ "text","target"])

# test= pd.read_csv('atis_intents_test.csv',
#                        names= ["target", "text"])

In [52]:
train.head()

Unnamed: 0,text,target
0,I APPLIED FOR A CREDIT CARD LAST MONTH BUT I D...,0
1,I OPENED A NEW ACCOUNT IN YOUR BANK BUT WHEN I...,1
2,I APPLIED FOR DEBIT OR CREDIT CARD WHEN I OPEN...,0
3,TILL NOW I DID NOT GET ANY DEBIT OR CREDIT CAR...,0
4,I GAVE AN APPLICATION FOR LINKING UNIQUE NUMBE...,1


In [53]:
train.shape

(100, 2)

Number of Intents :

In [54]:
train['target'].value_counts()

target
2    38
0    32
1    30
Name: count, dtype: int64

Preprocessing steps:
1. Target variable --> convert into one hot encoded values
2.

In [55]:
encode_target = oneHot().fit(np.array(train.target).reshape(-1,1))

In [56]:
train_target_encoded = encode_target.transform(np.array(train.target).reshape(-1,1)).toarray()

In [57]:
train_target_encoded

array([[1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0

In [58]:
train.text=train.text.map(lambda x : x.lower())

In [59]:
train.head()

Unnamed: 0,text,target
0,i applied for a credit card last month but i d...,0
1,i opened a new account in your bank but when i...,1
2,i applied for debit or credit card when i open...,0
3,till now i did not get any debit or credit car...,0
4,i gave an application for linking unique numbe...,1


In [60]:
train.text=train.text.map(word_tokenize)

In [61]:
train.text

0     [i, applied, for, a, credit, card, last, month...
1     [i, opened, a, new, account, in, your, bank, b...
2     [i, applied, for, debit, or, credit, card, whe...
3     [till, now, i, did, not, get, any, debit, or, ...
4     [i, gave, an, application, for, linking, uniqu...
                            ...                        
95                       [sorry, your, payment, failed]
96    [what, is, the, cause, of, my, failed, transac...
97               [why, my, refund, transaction, failed]
98    [how, do, i, find, out, why, a, payment, or, r...
99    [payment, status, is, approved, by, it, has, n...
Name: text, Length: 100, dtype: object

### Remove Stopwords and punctuations

In [62]:
def removeStopWords(strings,stop_word_list):
  sw=[str for str in strings if str not in stop_word_list and  str not in punctuation]
  return sw
stop_words=stopwords.words('english')
train['text']=train.text.map(lambda dataframe : removeStopWords(dataframe,stop_words))


In [63]:
train['text']

0     [applied, credit, card, last, month, get, one,...
1     [opened, new, account, bank, got, passbook, fo...
2     [applied, debit, credit, card, opened, account...
3     [till, get, debit, credit, card, applied, last...
4     [gave, application, linking, unique, number, b...
                            ...                        
95                             [sorry, payment, failed]
96                         [cause, failed, transaction]
97                        [refund, transaction, failed]
98         [find, payment, refund, transaction, failed]
99                [payment, status, approved, received]
Name: text, Length: 100, dtype: object

In [64]:
def normalize(text):
    return " ".join(text)

#We use PorterStemmer function from nltk.stem library.
#Porter stemmer is a class under nltk ,so we need to make an instance of it i.e stemmer and then use

Stemmer= PorterStemmer()

train["text"]= train.text.map(lambda s: [Stemmer.stem(x) for x in s])
train["text"]= train.text.apply(normalize)


In [66]:
train.head()


Unnamed: 0,text,target
0,appli credit card last month get one till alth...,0
1,open new account bank got passbook found middl...,1
2,appli debit credit card open account bank last...,0
3,till get debit credit card appli last month bank,0
4,gave applic link uniqu number bank account las...,1



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.



In [68]:
# We use Tokenizer from tensorflow.keras.preprocessing.text library
num_words=1000  #Setting it inilially to 1000
tokenizer = Tokenizer(num_words)
tokenizer.fit_on_texts(train.text)
#fit_on_texts - creates the vocabulary index based on word frequency.


tokenized_train = tokenizer.texts_to_sequences(train.text)

In [69]:
tokenized_train

[[6, 15, 2, 9, 13, 17, 52, 8, 34, 130, 131, 35, 6, 15, 2],
 [41, 30, 1, 10, 36, 53, 42, 93, 37, 62, 63],
 [6, 3, 15, 2, 41, 1, 10, 9, 13, 17, 8],
 [8, 17, 3, 15, 2, 6, 9, 13, 10],
 [25, 14, 31, 38, 12, 10, 1, 9, 13, 8, 39],
 [20, 43, 44, 12, 132, 31, 1, 20, 94, 30, 12],
 [95, 3, 15, 2, 64, 20, 96, 3, 15, 2, 76, 26, 97],
 [98, 96, 3, 15, 2, 95, 3, 15, 2, 64, 76, 26, 97],
 [25, 14, 31, 44, 12, 1, 9, 45, 8, 17, 26, 77],
 [25, 14, 31, 44, 12, 1, 9, 13, 17, 133, 76, 1, 99, 12],
 [6, 30, 1, 10, 36, 53, 42, 37, 78, 62, 63],
 [21, 27, 5, 3, 15, 2, 65, 54, 46, 2, 100],
 [5, 3, 2, 66, 5, 22, 65, 54, 46, 2, 100],
 [6, 30, 1, 10, 36, 53, 42, 134, 135, 62, 63, 53],
 [20, 94, 67, 2, 12, 1, 27, 136, 4, 16],
 [27, 5, 3, 2, 55, 101, 5, 28, 66, 22, 5, 47, 23, 1],
 [5, 68, 47, 23, 1, 27, 5, 55, 101],
 [25, 14, 39, 69, 56, 1, 79, 8, 40, 57, 137, 70, 14],
 [6, 41, 80, 1, 138, 9, 13, 36, 1, 56, 42, 69, 1],
 [6, 41, 69, 1, 9, 13, 36, 1, 56, 42, 80, 1, 78],
 [25, 14, 102, 1, 10, 9, 45, 40, 103, 139, 104, 102,