## Importing Necessary Library

In [1]:
import pandas as pd
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.porter import PorterStemmer
from bertopic import BERTopic
from nltk.stem import WordNetLemmatizer
from nltk import ngrams
import gensim.corpora as corpora
import tensorflow_hub as hub
import time
import gensim
import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
from keybert import KeyBERT
import tensorflow as tf

#### Loading training dataset

In [2]:
train_data = pd.read_csv('train_data.csv')

In [3]:
# ==== printng head of data
train_data.head(10)

Unnamed: 0,path,transcription,action,object,location
0,wavs/speakers/xRQE5VD7rRHVdyvM/7372ca00-45c4-1...,Turn on the kitchen lights,activate,lights,kitchen
1,wavs/speakers/R3mexpM2YAtdPbL7/dae28110-44fe-1...,Turn up the temperature,increase,heat,none
2,wavs/speakers/ZebMRl5Z7dhrPKRD/b55dcfd0-455d-1...,OK now switch the main language to Chinese,change language,Chinese,none
3,wavs/speakers/ppzZqYxGkESMdA5Az/61c54a20-4476-...,Turn down the bathroom temperature,decrease,heat,washroom
4,wavs/speakers/zaEBPeMY4NUbDnZy/8ef57ec0-44df-1...,Change the language,change language,none,none
5,wavs/speakers/W4XOzzNEbrtZz4dW/8a1fedc0-44d2-1...,Turn sound down,decrease,volume,none
6,wavs/speakers/8e5qRjN7dGuovkRY/92366b20-459e-1...,Decrease the heating in the washroom,decrease,heat,washroom
7,wavs/speakers/W4XOzzNEbrtZz4dW/d40180c0-44d2-1...,"It’s too loud, turn it down",decrease,volume,none
8,wavs/speakers/g2dnA9Wpvzi2WAmZ/694d7090-454a-1...,Change language,change language,none,none
9,wavs/speakers/5BEzPgPKe8taG9OB/b9b56e80-457b-1...,Turn the lights on in the kitchen,activate,lights,kitchen


In [4]:
train_data = train_data.drop(['path'], axis = 1)  # dropping path from dataset

In [5]:
# ==== merging action, object and location column
l = []
def merge(data):
    for i in range(len(data)):
        l.append(data["action"] + " " + data["object"] + " " + data["location"])
    return l


In [6]:
# ==== calling merge function
merge(train_data)

[0             activate lights kitchen
 1                  increase heat none
 2        change language Chinese none
 3              decrease heat washroom
 4           change language none none
                      ...             
 11561           increase heat kitchen
 11562              increase heat none
 11563                bring shoes none
 11564            decrease volume none
 11565       deactivate lights kitchen
 Length: 11566, dtype: object,
 0             activate lights kitchen
 1                  increase heat none
 2        change language Chinese none
 3              decrease heat washroom
 4           change language none none
                      ...             
 11561           increase heat kitchen
 11562              increase heat none
 11563                bring shoes none
 11564            decrease volume none
 11565       deactivate lights kitchen
 Length: 11566, dtype: object,
 0             activate lights kitchen
 1                  increase heat none
 2

In [7]:
train_data['merge'] = l[0]

In [8]:
len(train_data['merge'].unique()) # getting number of unique values in merge column

31

In [9]:
train_data.groupby('merge').describe() # showing description of merge column

Unnamed: 0_level_0,transcription,transcription,transcription,transcription,action,action,action,action,object,object,object,object,location,location,location,location
Unnamed: 0_level_1,count,unique,top,freq,count,unique,top,freq,count,unique,top,freq,count,unique,top,freq
merge,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
activate lamp none,187,4,Switch on the lamp,49,187,1,activate,187,187,1,lamp,187,187,1,none,187
activate lights bedroom,275,6,Switch the bedroom lights on,49,275,1,activate,275,275,1,lights,275,275,1,bedroom,275
activate lights kitchen,386,8,Kitchen lights on,57,386,1,activate,386,386,1,lights,386,386,1,kitchen,386
activate lights none,248,5,Turn the lights on,55,248,1,activate,248,248,1,lights,248,248,1,none,248
activate lights washroom,446,10,Turn the bathroom lights on,51,446,1,activate,446,446,1,lights,446,446,1,washroom,446
activate music none,359,7,Start the music,60,359,1,activate,359,359,1,music,359,359,1,none,359
bring juice none,243,5,Get me some juice,52,243,1,bring,243,243,1,juice,243,243,1,none,243
bring newspaper none,274,6,Bring me the newspaper,52,274,1,bring,274,274,1,newspaper,274,274,1,none,274
bring shoes none,266,6,Go get me my shoes,49,266,1,bring,266,266,1,shoes,266,266,1,none,266
bring socks none,265,6,Fetch my socks,52,265,1,bring,265,265,1,socks,265,265,1,none,265


In [10]:
# label encoding the merge column
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
r = le.fit_transform(train_data['merge'])

In [11]:
train_data['merge_vector'] = r

In [12]:
train_data.head(10)   # prining head of the updated data

Unnamed: 0,transcription,action,object,location,merge,merge_vector
0,Turn on the kitchen lights,activate,lights,kitchen,activate lights kitchen,2
1,Turn up the temperature,increase,heat,none,increase heat none,28
2,OK now switch the main language to Chinese,change language,Chinese,none,change language Chinese none,10
3,Turn down the bathroom temperature,decrease,heat,washroom,decrease heat washroom,24
4,Change the language,change language,none,none,change language none none,14
5,Turn sound down,decrease,volume,none,decrease volume none,25
6,Decrease the heating in the washroom,decrease,heat,washroom,decrease heat washroom,24
7,"It’s too loud, turn it down",decrease,volume,none,decrease volume none,25
8,Change language,change language,none,none,change language none none,14
9,Turn the lights on in the kitchen,activate,lights,kitchen,activate lights kitchen,2


In [13]:
train_data['location'].unique()

array(['kitchen', 'none', 'washroom', 'bedroom'], dtype=object)

In [14]:
train_data.shape          # Shape of train data

(11566, 6)

In [15]:
train_data.columns           # columns present in train data

Index(['transcription', 'action', 'object', 'location', 'merge',
       'merge_vector'],
      dtype='object')

In [16]:
train_data.isnull().sum()        # checking fot the null value 

transcription    0
action           0
object           0
location         0
merge            0
merge_vector     0
dtype: int64

In [17]:
# === Spliting the dataset into train and test
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(train_data['transcription'], train_data['merge_vector'], stratify= train_data['merge_vector'])

In [18]:
X_train.head(5)   # showing head of train data

7892              Lights on in the kitchen
4496                      Set the language
5504           Turn the washroom lights on
6817    Turn the heat down in the washroom
5610                               Lamp on
Name: transcription, dtype: object

In [19]:
import tensorflow_text as text

In [20]:
# calling bert preprocessor and bert encoder 

bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

INFO:absl:Using C:\Users\soni1\AppData\Local\Temp\tfhub_modules to cache modules.


In [21]:
# ==== sample test 
def gets(s):
    pt = bert_preprocess(s)
    return bert_encoder(pt)['pooled_output']

gets([
    "Turn on the kitchen lights",
    "Decrease the heating in the washroom"])

<tf.Tensor: shape=(2, 768), dtype=float32, numpy=
array([[-0.8874407 , -0.1622439 , -0.16199206, ...,  0.068757  ,
        -0.6292149 ,  0.9247728 ],
       [-0.92010224, -0.40742946, -0.9761191 , ..., -0.9196166 ,
        -0.6752491 ,  0.8806683 ]], dtype=float32)>

In [22]:
## creating layers for model

# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(8, activation='relu')(l)
l = tf.keras.layers.Dense(6, activation='relu')(l)
l = tf.keras.layers.Dense(4, activation='relu')(l)
l = tf.keras.layers.Dense(2, activation='relu')(l)
l = tf.keras.layers.Dense(1, activation='relu', name="output")(l)

# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])



In [23]:
# showing model summary
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               [(None,)]            0                                            
__________________________________________________________________________________________________
keras_layer (KerasLayer)        {'input_type_ids': ( 0           text[0][0]                       
__________________________________________________________________________________________________
keras_layer_1 (KerasLayer)      {'encoder_outputs':  109482241   keras_layer[0][0]                
                                                                 keras_layer[0][1]                
                                                                 keras_layer[0][2]                
______________________________________________________________________________________________

In [24]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [None]:
start = time.time()
model.fit(X_train,
          Y_train, 
          epochs=10)
end = time.time()
total_time = round(end-start,2)
print("\n\nModel took {} seconds to train".format(total_time))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

### Model training will take more 3 - 4 hrs time it's not yet completed, my system is not having graphics card thta's why it's is taking more and more time. Hence, I'm submitting it beacuse of deadline. Please consider this.