In [1]:
import tensorflow as tf
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from keras.utils import np_utils


with open("train_sentence.txt", "r") as f:
    sentences = f.readlines()
  
with open("train_label.txt", "r") as f:
    labels = f.readlines()

tokenizer = tf.keras.preprocessing.text.Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)

max_sequence_length = 50
data = tf.keras.preprocessing.sequence.pad_sequences(sequences, maxlen=max_sequence_length)

label_encoder = LabelEncoder()
label_encoder.fit(labels)
encoded_labels = label_encoder.transform(labels)
num_classes = len(label_encoder.classes_)
dummy_labels = np_utils.to_categorical(encoded_labels, num_classes)

train_data, test_data, train_labels, test_labels = train_test_split(data, dummy_labels, test_size=0.2, random_state=42)

model = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_sequence_length),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Conv1D(128, 5, activation='relu'),
    tf.keras.layers.GlobalMaxPooling1D(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(num_classes, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(train_data, train_labels, epochs=20, batch_size=32, validation_data=(test_data, test_labels))

with open("test_sentences.txt", "r") as f:
    test_data = f.readlines()

with open("test_label.txt", "r") as f:
    y_true = f.readlines()

test_sequences = tokenizer.texts_to_sequences(test_data)
td = test_data
test_data = tf.keras.preprocessing.sequence.pad_sequences(test_sequences, maxlen=max_sequence_length)
encoded_y_true = label_encoder.transform(y_true)
dummy_y_true = np_utils.to_categorical(encoded_y_true, num_classes)

predictions = model.predict(test_data)
res = np.argmax(predictions, axis=1)
y_pred = label_encoder.inverse_transform(res)
  
acc = accuracy_score(y_true, y_pred)
for i in range(len(td)):
  print("Text: " + td[i])
  print("Label true: " + y_true[i] + ", Label predict: " + y_pred[i] + "\n")

print("\nThe accuracy :", acc * 100, "%")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m


Text: i get paid in gbp. should i configure this and if so, where?

Label true: receiving_money
, Label predict: receiving_money


Text: is it possible for me to get money out in a different currency?

Label true: receiving_money
, Label predict: exchange_via_app


Text: can i use this to receive my salary?

Label true: receiving_money
, Label predict: receiving_money


Text: my salary is received in the form of gbp. do i need to do anything specific to configure this?

Label true: receiving_money
, Label predict: receiving_money


Text: how do people send me money?

Label true: receiving_money
, Label predict: receiving_money


Text: is gbp a supported currency?

Label true: receiving_money
, Label predict: exchange_via_app


Text: do i need to establish i am paid in gbp before a transfer?

Label true: receiving_money
, Label predict: receiving_money


Text: salary in gbp has been received, does it need to be configure