In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import f1_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import CountVectorizer

import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.layers import Dense, Softmax, Concatenate

2022-08-03 18:44:11.247474: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-03 18:44:11.247534: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
data_path = "/home/elemento/Downloads/Saarthi/task_data"
os.chdir(data_path)
os.getcwd()

'/home/elemento/Downloads/Saarthi/task_data'

### 1. Loading Data

In [3]:
df_train = pd.read_csv("train_data.csv")
df_valid = pd.read_csv("valid_data.csv")
print(df_train.shape, df_valid.shape)

(11566, 5) (3118, 5)


### 2. Pre-Processing Training Data

In [4]:
df_train.head()

Unnamed: 0,path,transcription,action,object,location
0,wavs/speakers/xRQE5VD7rRHVdyvM/7372ca00-45c4-1...,Turn on the kitchen lights,activate,lights,kitchen
1,wavs/speakers/R3mexpM2YAtdPbL7/dae28110-44fe-1...,Turn up the temperature,increase,heat,none
2,wavs/speakers/ZebMRl5Z7dhrPKRD/b55dcfd0-455d-1...,OK now switch the main language to Chinese,change language,Chinese,none
3,wavs/speakers/ppzZqYxGkESMdA5Az/61c54a20-4476-...,Turn down the bathroom temperature,decrease,heat,washroom
4,wavs/speakers/zaEBPeMY4NUbDnZy/8ef57ec0-44df-1...,Change the language,change language,none,none


In [5]:
# We don't need the audio files, since we already have the text transcripts
pre_train = df_train.drop("path", axis = 1)
pre_train.head()

Unnamed: 0,transcription,action,object,location
0,Turn on the kitchen lights,activate,lights,kitchen
1,Turn up the temperature,increase,heat,none
2,OK now switch the main language to Chinese,change language,Chinese,none
3,Turn down the bathroom temperature,decrease,heat,washroom
4,Change the language,change language,none,none


In [6]:
pre_train_to_encode = pre_train[["action", "object", "location"]]
print(pre_train_to_encode.shape)

enc = OneHotEncoder(handle_unknown='ignore')
Y_train = enc.fit_transform(pre_train_to_encode)
Y_train = Y_train.toarray()
print(Y_train.shape)

(11566, 3)
(11566, 24)


In [7]:
vectorizer = CountVectorizer(binary = True)
X_train = vectorizer.fit_transform(pre_train["transcription"])
X_train = X_train.toarray()
print(X_train.shape)

(11566, 92)


### 3. Pre-Processing Validation Data

In [8]:
# We don't need the audio files, since we already have the text transcripts
pre_val = df_valid.drop("path", axis = 1)
pre_val.head()

Unnamed: 0,transcription,action,object,location
0,Turn on the lights,activate,lights,none
1,Turn off the lights,deactivate,lights,none
2,Change language,change language,none,none
3,Pause the music,deactivate,music,none
4,Resume,activate,music,none


In [9]:
pre_val_to_encode = pre_val[["action", "object", "location"]]
print(pre_val_to_encode.shape)

Y_val = enc.transform(pre_val_to_encode)
Y_val = Y_val.toarray()
print(Y_val.shape)

(3118, 3)
(3118, 24)


In [10]:
X_val = vectorizer.transform(pre_val["transcription"])
X_val = X_val.toarray()
print(X_val.shape)

(3118, 92)


### 4. Defining a Custom Loss function and Accuracy Function

In [11]:
class MyLoss(tf.keras.losses.Loss):
    def __init__(self):
        super().__init__()
    
    def call(self, y_true, y_pred):
        # Initialization
        act_ind = 6
        obj_ind = 20
        loc_ind = 24
        
        act_true = y_true[ : , :act_ind]
        obj_true = y_true[ : , act_ind:obj_ind]
        loc_true = y_true[ : , obj_ind:loc_ind]
        
        act_pred = y_pred[ : , :act_ind]
        obj_pred = y_pred[ : , act_ind:obj_ind]
        loc_pred = y_pred[ : , obj_ind:loc_ind]

        cce = CategoricalCrossentropy()
        loss = cce(act_true, act_pred) + cce(obj_true, obj_pred) + cce(loc_true, loc_pred)
        return loss

In [12]:
class MyAcc(tf.keras.metrics.Metric):
    def __init__(self):
        super().__init__()
        self.acc = None
        
    def update_state(self, y_true, y_pred, sample_weight = None):
        # Initialization
        act_ind = 6
        obj_ind = 20
        loc_ind = 24
        
        act_true = y_true[ : , :act_ind]
        obj_true = y_true[ : , act_ind:obj_ind]
        loc_true = y_true[ : , obj_ind:loc_ind]

        act_pred = y_pred[ : , :act_ind]
        obj_pred = y_pred[ : , act_ind:obj_ind]
        loc_pred = y_pred[ : , obj_ind:loc_ind]
        
        act_true_labels = tf.math.argmax(act_true, axis = 1)
        act_pred_labels = tf.math.argmax(act_pred, axis = 1)
        
        obj_true_labels = tf.math.argmax(obj_true, axis = 1)
        obj_pred_labels = tf.math.argmax(obj_pred, axis = 1)
        
        loc_true_labels = tf.math.argmax(loc_true, axis = 1)
        loc_pred_labels = tf.math.argmax(loc_pred, axis = 1)
        
        act_correct = tf.cast(act_true_labels == act_pred_labels, "int32")
        obj_correct = tf.cast(obj_true_labels == obj_pred_labels, "int32")
        loc_correct = tf.cast(loc_true_labels == loc_pred_labels, "int32")
        
        sum_acc = tf.math.reduce_sum(act_correct * obj_correct * loc_correct) / len(y_true)
        self.acc = sum_acc
      
    def result(self):
        return self.acc

In [13]:
def MyF1Score(y_true, y_pred):
    # Initialization
    act_ind = 6
    obj_ind = 20
    loc_ind = 24
    
    act_true = y_true[ : , :act_ind]
    obj_true = y_true[ : , act_ind:obj_ind]
    loc_true = y_true[ : , obj_ind:loc_ind]

    act_pred = y_pred[ : , :act_ind]
    obj_pred = y_pred[ : , act_ind:obj_ind]
    loc_pred = y_pred[ : , obj_ind:loc_ind]
    
    act_true_labels = np.argmax(act_true, axis = 1)
    act_pred_labels = np.argmax(act_pred, axis = 1)
    
    obj_true_labels = np.argmax(obj_true, axis = 1)
    obj_pred_labels = np.argmax(obj_pred, axis = 1)
    
    loc_true_labels = np.argmax(loc_true, axis = 1)
    loc_pred_labels = np.argmax(loc_pred, axis = 1)
    
    act_f1_score = f1_score(act_true_labels, act_pred_labels, average = 'weighted')
    obj_f1_score = f1_score(obj_true_labels, obj_pred_labels, average = 'weighted')
    loc_f1_score = f1_score(loc_true_labels, loc_pred_labels, average = 'weighted')
    
    sum_f1_score = np.mean([act_f1_score, obj_f1_score, loc_f1_score])
    return [act_f1_score, obj_f1_score, loc_f1_score, sum_f1_score]

### 5. Training a Tensorflow Functional Model

In [14]:
# Input Layer
model_input = keras.Input(shape=(92), name="input")

# Common Architecture
x = Dense(units = 64, activation = "relu")(model_input)
x = Dense(units = 64, activation = "relu")(x)
x = Dense(units = 32, activation = "relu")(x)
x = Dense(units = 32, activation = "relu")(x)

# Differing Architectures
p1 = Dense(units = 32, activation = "relu")(x)
out1 = Dense(units = 6, activation = "softmax")(p1)

p2 = Dense(units = 32, activation = "relu")(x)
out2 = Dense(units = 14, activation = "softmax")(p2)

p3 = Dense(units = 32, activation = "relu")(x)
out3 = Dense(units = 4, activation = "softmax")(p3)

# Concatenate the Outputs
out = Concatenate(axis = 1)([out1, out2, out3])

label_model = tf.keras.Model(model_input, out, name = 'label_model')
label_model.summary()

Model: "label_model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input (InputLayer)             [(None, 92)]         0           []                               
                                                                                                  
 dense (Dense)                  (None, 64)           5952        ['input[0][0]']                  
                                                                                                  
 dense_1 (Dense)                (None, 64)           4160        ['dense[0][0]']                  
                                                                                                  
 dense_2 (Dense)                (None, 32)           2080        ['dense_1[0][0]']                
                                                                                        

2022-08-03 18:44:13.568109: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-08-03 18:44:13.568154: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-08-03 18:44:13.568192: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (lenovo): /proc/driver/nvidia/version does not exist
2022-08-03 18:44:13.568608: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [15]:
label_model.compile(
    loss=MyLoss(),
    optimizer='adam',
    metrics=[MyAcc()],
)

# train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train))
label_model.fit(X_train, Y_train, batch_size = 16, epochs = 5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f3b61d2be50>

### 6. Predicting

In [16]:
preds_train = label_model.predict(X_train)
preds_val = label_model.predict(X_val)
print(preds_train.shape, preds_val.shape)

(11566, 24) (3118, 24)


In [17]:
train_f1 = MyF1Score(Y_train, preds_train)
print(f"Training F1 Score: Action | Object | Location | Sum")
print(f"                 : {train_f1[0]} | {train_f1[1]} | {train_f1[2]} | {train_f1[3]}")

val_f1 = MyF1Score(Y_val, preds_val)
print(f"\nValidation F1 Score: Action | Object | Location | Sum")
print(f"                   : {val_f1[0]} | {val_f1[1]} | {val_f1[2]} | {val_f1[3]}")

Training F1 Score: Action | Object | Location | Sum
                 : 1.0 | 1.0 | 1.0 | 1.0

Validation F1 Score: Action | Object | Location | Sum
                   : 1.0 | 1.0 | 1.0 | 1.0
