In [1]:
import numpy as np 
import pandas as pd 
import json
import os
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Dense, Embedding, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import SimpleRNN

In [2]:
def load_data():
    intents = os.listdir('Train')
    
    intentToIntent_id = {}
    intent_idToIntent = {}
    intent_id = 0
    
    for intent in intents:
        intentName = intent.replace('.json','')
        intentToIntent_id[intentName] = intent_id
        intent_idToIntent[intent_id] = intentName
        intent_id+=1
    
    for dire in ('Train','Validate'):
        data = list()

        for intent in intents:
            
            file = json.load(open(f'{dire}/{intent}'))
            intentName = intent.replace('.json','')
            file = file[intentName]

            for i in file:
                    item = i['data']
                    text =""

                    for j in item:
                        text = text + j['text'].strip()+ " "

                    data.append((text.strip(),intentName,intentToIntent_id[intentName]))
                    
        if dire == 'Train':
            train_df = pd.DataFrame(data, columns =['value', 'Name','ID']).sample(frac = 1)
            train_df.reset_index(drop=True, inplace=True)
        else:
            val_df = pd.DataFrame(data, columns =['value', 'Name','ID']).sample(frac = 1)
            val_df.reset_index(drop=True, inplace=True)
            
    return train_df, val_df, intentToIntent_id, intent_idToIntent

In [3]:
df , val_df, intents, intentsID = load_data()

In [4]:
df

Unnamed: 0,value,Name,ID
0,I want to give The Canon of Medicine one out ...,RateBook,4
1,Add shi xin hui to my piano chill playlist.,AddToPlaylist,0
2,Play some Game music.,PlayMusic,3
3,Play some chanson music,PlayMusic,3
4,Find the Exile and the Kingdom,SearchCreativeWork,5
...,...,...,...
13779,I want to watch TV series The Practical Pig,SearchCreativeWork,5
13780,give The God Machine two of 6 points,RateBook,4
13781,Show me the work Not a Little Girl Anymore,SearchCreativeWork,5
13782,give Power of Faerun two of 6,RateBook,4


In [5]:
val_df

Unnamed: 0,value,Name,ID
0,rate The Lives of John Lennon five points,RateBook,4
1,Please find me the Classified book .,SearchCreativeWork,5
2,Add give us rest to my 70s Smash Hits playlist.,AddToPlaylist,0
3,Play the newest melody on Last Fm by Eddie Vi...,PlayMusic,3
4,table for 5 A.m . at Baker's Keyboard Lounge,BookRestaurant,1
...,...,...,...
695,I need to know the weather for Jan. the 3rd in...,GetWeather,2
696,Please play a sound track from the fifties tha...,PlayMusic,3
697,Tell me the weather forecast for Gibsland,GetWeather,2
698,What will the weather be nineteen hours from n...,GetWeather,2


In [6]:
intents

{'AddToPlaylist': 0,
 'BookRestaurant': 1,
 'GetWeather': 2,
 'PlayMusic': 3,
 'RateBook': 4,
 'SearchCreativeWork': 5,
 'SearchScreeningEvent': 6}

In [7]:
intentsID

{0: 'AddToPlaylist',
 1: 'BookRestaurant',
 2: 'GetWeather',
 3: 'PlayMusic',
 4: 'RateBook',
 5: 'SearchCreativeWork',
 6: 'SearchScreeningEvent'}

In [8]:
class Preprocessing():
    def __init__(self):
        self.x_train = df['value'].tolist()
        self.y_train = df['ID'].tolist()
        self.x_valid = val_df['value'].tolist()
        self.y_valid = val_df['ID'].tolist()
        self.tokenizer = Tokenizer(num_words=None)

    def createData(self):
        self.tokenizer.fit_on_texts(self.x_train + self.x_valid)
        self.x_train = self.tokenizer.texts_to_sequences(self.x_train)
        self.x_valid = self.tokenizer.texts_to_sequences(self.x_valid)

        #zero pad the sequences
        self.max_len = 50
        self.x_train = pad_sequences(self.x_train, maxlen=self.max_len)
        self.x_valid = pad_sequences(self.x_valid, maxlen=self.max_len)
        self.y_train = to_categorical(self.y_train)
        self.y_valid = to_categorical(self.y_valid)
        self.word_index = self.tokenizer.word_index

In [9]:
preprocess_obj = Preprocessing()
preprocess_obj.createData()

In [10]:
preprocess_obj.y_train.shape

(13784, 7)

In [11]:
preprocess_obj.y_valid.shape

(700, 7)

In [12]:
class DesignModel():
    def __init__(self):
        self.model = None
        self.x_train = preprocess_obj.x_train
        self.y_train = preprocess_obj.y_train
        self.x_valid = preprocess_obj.x_valid
        self.y_valid = preprocess_obj.y_valid
        
    def simple_rnn(self):
        self.model = Sequential()
        self.model.add(Embedding(len(preprocess_obj.word_index) + 1,100,input_length=preprocess_obj.max_len))
        self.model.add(SimpleRNN(100))
        self.model.add(Dense(len(intents), activation='sigmoid'))
        self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
        
    def model_train(self,batch_size,num_epoch):
        print("Fitting to model")
        self.model.fit(self.x_train, self.y_train, batch_size=batch_size, epochs=num_epoch, validation_data=[self.x_valid, self.y_valid])
        print("Model Training complete.")

In [13]:
model_obj = DesignModel()
model_obj.simple_rnn()
model_obj.model_train(64,5)

Fitting to model
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model Training complete.


In [14]:
class Evaluation():
    def get_accuracy(self,actuals, predictions):
        acc = accuracy_score(actuals, predictions)
        return acc

In [15]:
class Prediction():
    def __init__(self):
        self.model = model_obj.model
        self.tokenizer = preprocess_obj.tokenizer
        self.max_len = preprocess_obj.max_len
        
    def predict_validation(self):
        self.xtest = val_df['value'].tolist()
        self.ytest = val_df['ID'].tolist()
        self.xtest = self.tokenizer.texts_to_sequences(self.xtest)
        self.xtest = pad_sequences(self.xtest, maxlen=self.max_len)
        self.ypred = self.model.predict(self.xtest)
        self.ypred = [np.argmax(item) for item in self.ypred]
    
    def predict(self,query):
        query_seq = self.tokenizer.texts_to_sequences([query])
        query_pad = pad_sequences(query_seq, maxlen=self.max_len)
        pred = self.model.predict(query_pad)
        pred = np.argmax(pred)
        result = intentsID[pred]
        return result

In [16]:
pred_obj = Prediction()
pred_obj.predict_validation()



In [17]:
querylist = [
    'rate The Gift: Imagination and the Erotic Life of Property five stars',
     'table for Breadline Cafe in Minnesota next friday',
     'Will it be hot at 13:19 in De Funiak Springs Serbia and Montenegro ?',
     'Play some sixties songs on Google Music',
     'rate this textbook four out of 6']
for query in querylist:
    result = pred_obj.predict(query)
    print("Intent: "+str(result)+"\tQuery: "+str(query))

Intent: RateBook	Query: rate The Gift: Imagination and the Erotic Life of Property five stars
Intent: BookRestaurant	Query: table for Breadline Cafe in Minnesota next friday
Intent: GetWeather	Query: Will it be hot at 13:19 in De Funiak Springs Serbia and Montenegro ?
Intent: PlayMusic	Query: Play some sixties songs on Google Music
Intent: RateBook	Query: rate this textbook four out of 6


In [18]:
eval_obj = Evaluation()
acc = eval_obj.get_accuracy(pred_obj.ytest,pred_obj.ypred)
print("Acc: {:.2%}".format(acc))

Acc: 98.29%
