In [70]:
import numpy as np
import pandas as pd

In [71]:
df = pd.read_csv("NLUdata.csv")

In [72]:
df.head()

Unnamed: 0,Text,Intent
0,Hi,Greet
1,How are you?,Greet
2,Hello,Greet
3,Hey there!,Greet
4,Good morning,Greet


In [73]:
df["Intent"].value_counts()

Intent
Recommendation    24
Greet             20
Advice            16
Inquiry           12
Farewell          10
Name: count, dtype: int64

In [74]:
df.shape

(82, 2)

In [75]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df["Intent_encoded"] = le.fit_transform(df["Intent"])

In [83]:
df.head()

Unnamed: 0,Text,Intent,Intent_encoded
0,Hi,Greet,2
1,How are you?,Greet,2
2,Hello,Greet,2
3,Hey there!,Greet,2
4,Good morning,Greet,2


In [None]:
# A dependency of the preprocessing for BERT inputs
!pip install -U "tensorflow-text==2.13.*"
!pip install "tf-models-official==2.13.*"

In [77]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text

### **Text Embedding**

In [78]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [79]:
def get_sentence_embeding(sentences):
    preprocessed_text = bert_preprocess(sentences)
    return bert_encoder(preprocessed_text)['pooled_output']

In [80]:
def batch_generator(sentences, batch_size):
    for i in range(0, len(sentences), batch_size):
        yield sentences[i:i+batch_size]

batch_size = 50
embeddings = []
i = 0

for batch in batch_generator(df["Text"], batch_size):
    print(i)
    embeddings.append(get_sentence_embeding(batch))
    i = i + 1

embededtext = tf.concat(embeddings, axis=0)

0
1


In [81]:
embededtext = embededtext.numpy()

In [86]:
X = df.drop(["Intent","Intent_encoded"], axis=1)
Y = df["Intent"]

In [87]:
df_E = embededtext
df_E.shape

(82, 768)

In [88]:
from sklearn.model_selection import train_test_split

X_train,X_test,Y_train,Y_test = train_test_split(df_E,Y,train_size=0.95)
X_train.shape

(77, 768)

In [89]:
X_train

array([[-0.9103812 , -0.5113187 , -0.94031984, ..., -0.84036183,
        -0.74691874,  0.85486543],
       [-0.975483  , -0.60691714, -0.9920019 , ..., -0.91500324,
        -0.82740754,  0.97546566],
       [-0.85630196, -0.45192748, -0.847988  , ..., -0.63242424,
        -0.7473691 ,  0.8741666 ],
       ...,
       [-0.9243624 , -0.4758008 , -0.8952336 , ..., -0.7635424 ,
        -0.75172174,  0.92970246],
       [-0.7723576 , -0.16994861,  0.32714078, ...,  0.44123703,
        -0.5151343 ,  0.84427124],
       [-0.79174066, -0.36748108, -0.10762394, ..., -0.1396516 ,
        -0.59330964,  0.8521762 ]], dtype=float32)

### **Model Building**

In [90]:
from sklearn.ensemble import RandomForestClassifier


clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, Y_train)

In [102]:
import joblib

joblib.dump(clf,"Intent_classifier.joblib")

['Intent_classifier.joblib']

In [91]:
y_pred = clf.predict(X_test)
y_pred

array(['Recommendation', 'Farewell', 'Recommendation', 'Farewell',
       'Recommendation'], dtype=object)

In [92]:
accuracy = clf.score(X_test, Y_test)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.60


**Try for New**

In [None]:
sentence = "Hi"
embedded_text = get_sentence_embeding([sentence])

In [94]:
new_prediction = clf.predict_proba(embedded_text)
max_index = np.argmax(new_prediction)
max_probability = new_prediction[0][max_index]
predicted_class = clf.classes_[max_index]

print("Predicted class:", predicted_class)
print("Max probability:", max_probability)

Predicted class: Greet
Max probability: 0.99


### **FallBack Mechanism on Threshold**

In [115]:
def Intent_prediction(sentence):
  embedded_text = get_sentence_embeding([sentence])
  new_prediction = clf.predict_proba(embedded_text)
  max_index = np.argmax(new_prediction)
  max_probability = new_prediction[0][max_index]
  predicted_class = clf.classes_[max_index]

  if(max_probability >= 0.7):
    print("Predicted class:", predicted_class)
    print("Max probability:", max_probability)
  else:
    print("NLU fallback: Intent could not be confidently determined")

In [120]:
sentenceX = "Hello"
Intent_prediction(sentenceX)

Predicted class: Greet
Max probability: 0.93


In [113]:
sentenceZ = "Take care"
Intent_prediction(sentenceZ)

Predicted class: Farewell
Max probability: 0.72


In [121]:
sentenceQ = "Listen"
Intent_prediction(sentenceQ)

NLU fallback: Intent could not be confidently determined
