In [None]:
import pandas as pd
import numpy as np
import string
from string import punctuation
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras import layers
from sklearn.model_selection import train_test_split

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
df_train=pd.read_csv("/content/drive/MyDrive/intentdataset.csv")

In [None]:
df_train.columns = ['label', 'query']
df_train.head()

Unnamed: 0,label,query
0,inquiry,What is the status of my return request?
1,inquiry,I received a damaged item. How can I get a rep...
2,inquiry,Can you provide more information about the pro...
3,inquiry,I forgot my password. How can I reset it?
4,inquiry,Is there a discount code available for my next...


In [None]:
df_train['label'].unique()

array(['inquiry', 'refund', 'complaint', 'feedback'], dtype=object)

In [None]:
one_hot = pd.get_dummies(df_train['label'])
df_train.drop(['label'], axis=1, inplace=True)
df = pd.concat([df_train, one_hot], axis=1)
df.head()

Unnamed: 0,query,complaint,feedback,inquiry,refund
0,What is the status of my return request?,0,0,1,0
1,I received a damaged item. How can I get a rep...,0,0,1,0
2,Can you provide more information about the pro...,0,0,1,0
3,I forgot my password. How can I reset it?,0,0,1,0
4,Is there a discount code available for my next...,0,0,1,0


In [None]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = keras.Sequential(
            [
                layers.Dense(ff_dim, activation="relu"),
                layers.Dense(embed_dim),
            ]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [None]:
class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super().__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [None]:
vocab_size = 2000
maxlen = 63

In [None]:
# stpword = stopwords.words('english')
# def get_text_processing(text):
#     no_punctuation = [char for char in text if char not in string.punctuation]
#     no_punctuation = ''.join(no_punctuation)
#     return ' '.join([word for word in no_punctuation.split() if word.lower() not in stpword])


In [None]:
# df['query'] = df['query'].apply(get_text_processing)
# df.head()

Unnamed: 0,query,complaint,feedback,inquiry,refund
0,status return request,0,0,1,0
1,received damaged item get replacement,0,0,1,0
2,provide information products specifications,0,0,1,0
3,forgot password reset,0,0,1,0
4,discount code available next purchase,0,0,1,0


In [None]:
X = df["query"].values
y = df.drop("query", axis=1).values

max_features = 20000
tokenizer = Tokenizer(num_words=max_features, split=' ')
tokenizer.fit_on_texts(X)
X = tokenizer.texts_to_sequences(X)
X = pad_sequences(X, maxlen=63)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42
)

print(X_train.shape,y_train.shape)
print(X_test.shape,y_test.shape)

(2104, 63) (2104, 4)
(903, 63) (903, 4)


In [None]:
embed_dim = 100
num_heads = 4
ff_dim = 200

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads, ff_dim)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(4, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.compile(
    optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]
)
history = model.fit(
    X_train, y_train, batch_size=128, epochs=10, validation_data=(X_test, y_test)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model.save("/content/drive/MyDrive/sentu2.h5")

In [None]:
text = ["The website was reliable."]

# for i in text:
#   text = [get_text_processing(i)]

# print(text)

text = tokenizer.texts_to_sequences(text)
text = pad_sequences(text, maxlen=63, dtype='int32', value=0)
intent = model.predict(text,batch_size=1,verbose = 2)[0]

print(intent)

if (np.argmax(intent) == 0):
  print("complaint")
elif (np.argmax(intent) == 1):
  print("feedback")
elif (np.argmax(intent) == 2):
  print("inquiry")
else:
  print("refund")

1/1 - 0s - 18ms/epoch - 18ms/step
[1.9136231e-03 9.9808276e-01 3.4598979e-06 2.6640814e-07]
feedback
