In [1]:
# Import 
import gensim.downloader

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

from sklearn import preprocessing
from sklearn.pipeline import make_pipeline

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd

import _helpers as hp

# Load Data

In [2]:
ASR_data = pd.read_csv("snips/new_ASR_Autocorrection_with_labels.csv") # ASR data with improved speech recognition with 15555 framerate and with autocorrection applied

GT_data = pd.read_csv("snips/merged_GT_data.csv") # Groundtruth data

## Data manipulation

In [135]:
#Get ASR data into a numpy word array per sentence plus a numpy label array

XX_ASR = ASR_data["transcript"].apply(lambda x: x.split(' '))
X_ASR = list(XX_ASR) #numpy word array per transcript

y_num_ASR = np.array(ASR_data["user_action_num"]) #labels

In [136]:
#Get Groundtruth data into a numpy word array per sentence plus a numpy label array

XX_GT = GT_data["transcript"].apply(lambda x: x.split(' '))
X_GT = list(XX_GT)  #numpy word array per transcript

y_num_GT = np.array(GT_data["user_action_num"]) #labels

# Word2Vec

## Import pre-trained Word2Vec model

In [137]:
model_w2v = gensim.downloader.load('glove-wiki-gigaword-100')

## Get features and labels

In [138]:
def get_Word2Vec_feature(sentence):
    """return word2vec numpy array representation of sentence"""
    
    value_iter = np.zeros((model_w2v.vector_size,))

    for word in sentence:
        try:
            word_vec = model_w2v[word]
            value_iter += np.array(word_vec)
        except:
            continue

    return value_iter

In [139]:
#features Space
features_ASR = np.stack(XX_ASR.apply(get_Word2Vec_feature).values, axis = 0)

#features Space
features_GT = np.stack(XX_GT.apply(get_Word2Vec_feature).values, axis= 0)

In [140]:
labels_ASR = ASR_data["user_action_num"]
labels_GT = GT_data["user_action_num"]

# Classifiers

## Train/Test Split

If you want to train and test only with Groundtruth data uncomment the next cell and comment the remain cells in this section 

In [141]:
# # Split into training and test data

# train_features, test_features, train_labels, test_labels = train_test_split(features_GT, labels_GT, train_size= 0.9)

If you want to train and test only with ASR data uncomment the next cell and comment the remain cells in this section 

In [142]:
# # Split into training and test data

# train_features, test_features, train_labels, test_labels = train_test_split(features_ASR, labels_ASR, train_size= 0.9)

If you want to train with Groundtruth and test with ASR data uncomment the next cell and comment the remain cells in this section 

In [143]:
#Split into training and test data
train_features, test_features, train_labels, test_labels = features_GT,features_ASR,labels_GT,labels_ASR

## Logistic Regression

In [144]:
lgr = LogisticRegression(C = 0.06,max_iter= 1000) #Create the classification model

lgr_pipe = make_pipeline(preprocessing.StandardScaler(), lgr) #Scale feature space
lgr_pipe.fit(train_features, train_labels)


lgr_pred_labels = lgr_pipe.predict(test_features) #predictions

lgr_score = lgr_pipe.score(test_features,test_labels) #accuracy

### Model Evaluation

In [145]:
print(classification_report(test_labels, lgr_pred_labels, target_names= ['SwitchLightOff','SwitchLightOn','IncreaseBrightness','DecreaseBrightness','SetLightBrightness',"SetLightColor"]))

print(confusion_matrix(test_labels, lgr_pred_labels))

print("\nACCURACY:", lgr_score)


                    precision    recall  f1-score   support

    SwitchLightOff       0.78      0.73      0.75       276
     SwitchLightOn       0.70      0.88      0.78       257
IncreaseBrightness       0.71      0.80      0.75       269
DecreaseBrightness       0.72      0.66      0.69       268
SetLightBrightness       0.91      0.84      0.87       296
     SetLightColor       0.88      0.78      0.82       294

          accuracy                           0.78      1660
         macro avg       0.78      0.78      0.78      1660
      weighted avg       0.79      0.78      0.78      1660

[[201  20  19  18  10   8]
 [  9 225   9   8   2   4]
 [ 13  12 215  22   2   5]
 [ 16  19  40 176   7  10]
 [  8  13   8  13 249   5]
 [ 10  31  13   8   4 228]]

ACCURACY: 0.7795180722891566


# Naive Bayes

### Gaussian Naive Bayes

In [146]:
gnb = GaussianNB() #Create the classification model

gnb_pipe = make_pipeline(preprocessing.StandardScaler(), gnb) #Scale feature space
gnb_pipe.fit(train_features, train_labels)


gnb_pred_labels = gnb_pipe.predict(test_features) #predictions

gnb_score = gnb_pipe.score(test_features,test_labels) #accuracy

#### Model Evaluation

In [147]:
print(classification_report(test_labels, gnb_pred_labels, target_names= ['SwitchLightOff','SwitchLightOn','IncreaseBrightness','DecreaseBrightness','SetLightBrightness',"SetLightColor"]))

print(confusion_matrix(test_labels, gnb_pred_labels))

print("\nACCURACY:", gnb_score)

                    precision    recall  f1-score   support

    SwitchLightOff       0.61      0.60      0.61       276
     SwitchLightOn       0.69      0.65      0.67       257
IncreaseBrightness       0.65      0.42      0.51       269
DecreaseBrightness       0.44      0.45      0.45       268
SetLightBrightness       0.76      0.81      0.79       296
     SetLightColor       0.56      0.72      0.63       294

          accuracy                           0.62      1660
         macro avg       0.62      0.61      0.61      1660
      weighted avg       0.62      0.62      0.61      1660

[[166  18   8  25  19  40]
 [ 11 167   7  19   7  46]
 [ 35   7 113  58  23  33]
 [ 47  12  37 121  10  41]
 [  4   9   5  29 241   8]
 [  8  29   4  23  17 213]]

ACCURACY: 0.6150602409638555


# SVM

In [148]:
svm = SVC() #Create the classification model

svm_pipe = make_pipeline(preprocessing.Normalizer(), svm) #Scale feature space
svm_pipe.fit(train_features, train_labels)


svm_pred_labels = svm_pipe.predict(test_features) #predictions

svm_score = svm_pipe.score(test_features,test_labels) #accuracy

### Model Evaluation

In [149]:
print(classification_report(test_labels, svm_pred_labels, target_names= ['SwitchLightOff','SwitchLightOn','IncreaseBrightness','DecreaseBrightness','SetLightBrightness',"SetLightColor"]))

print(confusion_matrix(test_labels, svm_pred_labels))

print("\nACCURACY:", svm_score)

                    precision    recall  f1-score   support

    SwitchLightOff       0.77      0.70      0.73       276
     SwitchLightOn       0.60      0.88      0.72       257
IncreaseBrightness       0.69      0.75      0.72       269
DecreaseBrightness       0.74      0.65      0.69       268
SetLightBrightness       0.93      0.81      0.87       296
     SetLightColor       0.91      0.77      0.83       294

          accuracy                           0.76      1660
         macro avg       0.77      0.76      0.76      1660
      weighted avg       0.78      0.76      0.76      1660

[[193  29  23  19   7   5]
 [  8 226  15   4   2   2]
 [ 18  17 202  24   1   7]
 [ 19  24  41 175   3   6]
 [  9  29   6   9 241   2]
 [  3  49   4   7   5 226]]

ACCURACY: 0.7608433734939759


# NEURAL NETWORK

In [150]:
mlp = MLPClassifier(hidden_layer_sizes=(100,100,100), activation='relu', solver='adam', max_iter=5000) #Create the classification model

mlp_pipe = make_pipeline(preprocessing.Normalizer(), mlp) #Scale feature space
mlp_pipe.fit(train_features, train_labels)


mlp_pred_labels = mlp_pipe.predict(test_features) #predictions

mlp_score = mlp_pipe.score(test_features,test_labels) #accuracy

### Model Evaluation

In [151]:
print(classification_report(test_labels, mlp_pred_labels, target_names= ['SwitchLightOff','SwitchLightOn','IncreaseBrightness','DecreaseBrightness','SetLightBrightness',"SetLightColor"]))

print(confusion_matrix(test_labels, mlp_pred_labels))

print("\nACCURACY:", mlp_score)

                    precision    recall  f1-score   support

    SwitchLightOff       0.76      0.78      0.77       276
     SwitchLightOn       0.74      0.87      0.80       257
IncreaseBrightness       0.76      0.84      0.79       269
DecreaseBrightness       0.79      0.69      0.73       268
SetLightBrightness       0.90      0.84      0.87       296
     SetLightColor       0.86      0.77      0.81       294

          accuracy                           0.80      1660
         macro avg       0.80      0.80      0.80      1660
      weighted avg       0.80      0.80      0.80      1660

[[216  19  11  13   7  10]
 [ 13 224   6   5   3   6]
 [ 13  12 225  13   1   5]
 [ 24  13  31 184   7   9]
 [  8   8  12  12 250   6]
 [ 11  28  13   6  10 226]]

ACCURACY: 0.7981927710843374


# Try Your Self

In [152]:
def user_friendly(sentence, cls):
    """return action from sentence"""
    
    sent = sentence.split()
    new_sent = []

    for word in sent:
        new_sent.append(hp.autocorrection(word))
    
    x_pred = get_Word2Vec_feature(new_sent).reshape(1,-1)
    y_pred = cls.predict(x_pred)
    return hp.indx2action(y_pred)

In [153]:
user_friendly("I want to set brithenss to fifty in my living room", lgr_pipe)

['SetLightBrightness']