Reading Data from file

In [1]:
# For your convenience, a function for reading in the dataset:
import csv

def load_dataset(filename):
    intent = []
    unique_intent = []
    sentences = []
    with open(filename, "r", encoding="latin1") as f:
        data = csv.reader(f, delimiter=",")
        for row in data:
            sentences.append(row[0])
            intent.append(row[1])
    unique_intent = set(intent)
    return sentences, intent, unique_intent
            
sentences, intent, unique_intent = load_dataset("../Data/dataset.csv")

Preprocessing Data

In [17]:
import re
import numpy as np

def create_bow_rep():
    '''
    This function computes required bag-of-words representation matrix.
    
    Output : bow is a (v x m) matrix where v is vocabulary size and m is number of examples. 
    
    '''
    bow = np.zeros((v,m))
    for i in range(v):
        word = list(vocab)[i]
        for j in range(m):
            if( word in sentences[j]):
                
                bow[i][j]=1
            else:
                bow[i][j]=0
    return bow
            
def create_labelled_matrix():
    '''
    This function creates output matrix that is a hot vector representation of correct output for each example.
    
    Output : labeled is a (k x m) matrix where k is number of intent (total classes) and m is number of examples.
    '''
    labeled = np.zeros((k,m))
    for i in range(k):
        cls = list(unique_intent)[i]
        for j in range(m):
            if(cls == intent[j]):
                
                labeled[i][j]=1
            else:
                labeled[i][j]=0
    
    return labeled

tokens_reg = re.compile(r"[\w']+|[.,!?;]")
tokens = [] #this list will contain tokens with punctuation seperated eg: "help?" is stored as "help" , "?" 
for word in sentences:
    tokens+=tokens_reg.findall(word)
vocab = set([word.lower() for word in tokens if word not in [".",",","!","?",";","[","]"]])
m = len(sentences) # number of examples
v = len(vocab) # vocabulary size
k = len(unique_intent) # number of classes

sentc_2_ix = {sentc:sentences.index(sentc) for sentc in sentences} 
vocab_2_ix = {word:list(vocab).index(word) for word in list(vocab)}
intent_2_ix = {intent:list(unique_intent).index(intent) for intent in list(unique_intent)}

X = create_bow_rep()
Y = create_labelled_matrix()

Defining Classifier

In [18]:
class TwoLayerNNClassifier:
    
    def __init__(self,hidden,classes, eta=0.005, epoch=100, seed = 5 ):
        self.eta = eta # Learning rate 
        self.epoch = epoch
        self.seed = seed # Tuning initial parameter values
        self.weights = {}
        self.bias = {}
        self.hidden_z = None
        self.hidden = hidden # Number of hidden layer neurons
        self.classes = classes #Number of classes
        
    
    def initialise_parameters(self,X):
        '''
        This function initialise parameters like Weight and bias matrix. 

        W is weight matrix between Input and Hidden Layer of shape (150 x v) where v is vocabulary size
        b is bias matrix between Input and Hidden Layer of shape (150 x m ) where m is number of examples
        U is weight matrix between Hidden and Output Layer of shape (k x 150) where k is number of intents/classes
        There is no bias term between Hidden and Output Layer

        Input : seed value for random state

        Output : Weight and bias matrix

        '''
        v,n = X.shape
        rang = np.random.RandomState(seed=self.seed)
        a=rang.uniform(-1,1,size=(self.hidden,v))
        b = rang.uniform(-1,1,size=(self.hidden,1))
        c=rang.uniform(-1,1,size=(self.classes,self.hidden))
        d = rang.uniform(-1,1,size=(self.classes,1))
        self.weights[0] = np.array(a, dtype=np.float64)
        self.bias[0] = np.array(b, dtype=np.float64)
        self.weights[1] = np.array(c, dtype=np.float64)
        self.bias[1] = np.array(d, dtype=np.float64)

    
    def forward(self,X):
        
        '''
            This function is forward feed for neural network. It calculates activation at hidden layer using ReLU and at output layer
            using softmax.

            Output : It returns a tuple containing three entities : 
                     y_out : It is a matrix of shape (k x m) where k is number of classes/intents and m is number of examples.
                             It contains predicted probability distribution of each examples over k classes. 

                     z_at_hidden : It is matrix of shape (150 x m) where m is number of examples. It is weighted sum of input at 
                                   hidden layer for each example. Every column vector represent weighted sum for each neuron  at
                                   hidden layer.

                     activation_at_hidden : It is matrix of shape (150 x m). It is activation value for each neuron at hidden layer
                                             wrt every example. Every column vector represent activation value for each neuron at
                                             hidden layer.

            '''

        #Propogation to Hidden Layer
        self.hidden_z = np.add(np.matmul(self.weights[0],X),
                               np.matmul(self.bias[0],np.ones((self.bias[0].shape[1],X.shape[1])))) # weighted sum of input at hidden layer
        activation_at_hidden = self.relu(self.hidden_z) # activation matrix at hidden layer

        #Propogation to output layer
        z_at_output = np.add(np.matmul(self.weights[1],activation_at_hidden),
                            np.matmul(self.bias[1],np.ones((self.bias[1].shape[1],
                                                            activation_at_hidden.shape[1])))) # weighted sum at output
        y_pred = self.softmax(z_at_output) # probability distribution over different intents for each example

        return y_pred
    
    def backward(self,X,Y,y_pred):
        
        '''
        This function back proporgates the error to previous layers. It calculates error terms for each layer and then calculates
        partial derivatives wrt weights and bias of every layer.

        Input : y_out : It is a matrix of shape (k x m) where k is number of classes/intents and m is number of examples.
                         It contains predicted probability distribution of each examples over k classes. 

                z_at_hidden : It is matrix of shape (150 x m) where m is number of examples. It is weighted sum of input at 
                               hidden layer for each example. Every column vector represent weighted sum for each neuron  at
                               hidden layer.

                activation_at_hidden : It is matrix of shape (150 x m). It is activation value for each neuron at hidden layer
                                         wrt every example. Every column vector represent activation value for each neuron at
                                         hidden layer.
        Output : It returns a tuple contaning change in parameters,
                 del_weight_of_U : This is a (k x 150) matrix containing change in each weight between hidden and output layer.
                 del_weight_of_W : This is a (150 x v) matrix containing change in each weight between input and hidden layer.
                 del_bias : This is a (150 x m) matrix containing change in each bias term between input and hidden layer.

        '''
        y_true = np.array(Y)
        del_error_at_out = y_pred - y_true # error term at output layer
        del_error_at_hidden = np.multiply(np.matmul(self.weights[1].T,del_error_at_out),self.deriv_relu(self.hidden_z)) # error term at hidden layer
        partial_change_in_weight_of_U = np.matmul(del_error_at_out,self.relu(self.hidden_z).T) # weights between hidden and output layer
        partial_change_in_weight_of_W = np.matmul(del_error_at_hidden,X.T) # weights between input and hidden layer
        partial_change_in_bias_of_W = del_error_at_hidden # bias for hidden layer neurons
        partial_change_in_bias_of_U = del_error_at_out
        del_weight_of_U = partial_change_in_weight_of_U
        del_weight_of_W = partial_change_in_weight_of_W
        del_bias_of_W = partial_change_in_bias_of_W
        del_bias_of_U = partial_change_in_bias_of_U
        return (del_weight_of_U,del_weight_of_W,del_bias_of_W,del_bias_of_U)
    
    
    def train(self,X,Y,batch):
        m = X.shape[1]
        for u in range(self.epoch):
            cost_list = []
            indices = np.arange(m)
            np.random.shuffle(indices) # array with shuffled indicies of examples
            j=np.arange(0,m,batch) 
            np.append(j,m-1)
            for i in range(len(j)-1):

                #Parameters initialization
                del_U = 0
                del_W = 0
                del_b1 = 0
                del_b2 = 0

                #Batch Data
                x = X[:,indices[j[i]:j[i+1]]]
                y = Y[:,indices[j[i]:j[i+1]]]

                #Forward Feed
                y_pred = self.forward(x)

                # Cost
                cost = self.cost_func(y,y_pred)/len(j)
                cost_list.append(cost)

                #Backward Feed
                (del_U,del_W,del_b1,del_b2) = self.backward(x,y,y_pred)
                del_U = del_U/len(j)
                del_W = del_W/len(j)
                del_b1 = (np.mean(del_b1,axis=1)).reshape(-1,1)
                del_b2 = (np.mean(del_b2,axis=1)).reshape(-1,1)

                #Parameters update
                self.weights[1] -= (self.eta)*del_U
                self.weights[0] -= (self.eta)*del_W
                self.bias[0] -= (self.eta)*del_b1
                self.bias[1] -= (self.eta)*del_b2
            print("Loss is ",sum(cost_list))
    
    def cost_func(self,Y,y_pred):
        '''
        This function computes average loss over whole set of examples.

        Input : y_out is matrix of shape (k x m) which contain predicted values for every example. Every column vector,
                representing an example, contains predicted probability distribution over k classes.

        Output : return average cost over whole dataset

        '''
        y_true = np.array(Y) # converting labelled example into numpy matrix

        # For every column vector in matrix of labelled example, we retrieve index of maximum value, which is 1 in this case,
        # This index corresponds to correct class for that particular example.
        true_index = np.argmax(y_true,axis=0)

        # Selecting only those predictions from each column which corresponds to correct class for that example. Since row
        # in y_out represent classes and column represent examples, so for every column we are choosing a row index from "true_index"
        # vector which contain row index of correct class.
        cost = np.sum(-np.log(y_pred[true_index,np.arange(y_true.shape[1])]))
        return cost

    
    def softmax(self,x):
        '''
        This function computes softmax value for an array or a matrix

        Input : x is a array or a matrix

        Output : result is either a array of softmax value or a matrix
        '''
        x=x.astype(float)
        if x.ndim==1:
            return np.exp(x)/np.sum(np.exp(x))
        elif x.ndim==2:
            result=np.zeros_like(x)
            M,N=x.shape
            z = x - np.max(x, axis=0, keepdims=True)
            for n in range(N):
                S=np.sum(np.exp(z[:,n]))
                result[:,n]=np.exp(z[:,n])/S
            return result
        else:
            print("The input array is not 1- or 2-dimensional.")
        
    def relu(self,X):
        '''
        This function compares each value in given array or matrix with 0 and returns maximum of that comparison

        Input : Array or matrix of real numbers

        Output : Array or matrix of real numbers
        '''
        return np.maximum(0,X)
    
    def deriv_relu(self,X):
        '''
        This function returns derivative of relu() function. It compares each value in array or matrix with 0 and returns
        1 if a value is greater than 0 else it returns 0.

        Input : Array or matrix of real numbers

        Output : Array or matrix of 0's and 1's
        '''
        return np.where(X<=0,0,1)

Evaluation

In [19]:
def accuracy(X,Y,classifier):
    '''
    This function computes accuracy of model by computing ratio of number of correct prediction to number of examples.
    
    Output : a real number between 0 and 1, where 1 being highest accuracy.
    '''
    y = classifier.forward(X) # predicted values
    
    # For every column vector in matrix of predicted value, we retrieve index of maximum value in that vector. This index 
    # correspond to predicted class index.
    predicted = np.argmax(y,axis=0) # predicted index (intent) of maximum value for each example
    
    true_output = np.array(Y) # Converting labelled examples to numpy matrix
    
    # For every column vector in matrix of labelled example, we retrieve index of maximum value, which is 1 in this case,
    # This index corresponds to correct class for that particular example.
    true = np.argmax(true_output,axis=0) # true intent vector for each example
    
    
    accuracy = (np.sum(predicted == true))/m
    return accuracy

Instance Run

In [20]:
classifier = TwoLayerNNClassifier(150,k,epoch=200)
classifier.initialise_parameters(X)
classifier.train(X,Y,100)

Loss is  1198.6853557306447
Loss is  794.4781524920709
Loss is  641.6800501027211
Loss is  538.032175690774
Loss is  468.43170538381673
Loss is  413.2481374734132
Loss is  369.225287681886
Loss is  337.2451179530668
Loss is  305.84282966024404
Loss is  282.1370649070438
Loss is  262.73304824051104
Loss is  242.00615086862467
Loss is  227.9947132732362
Loss is  212.3307978705223
Loss is  198.0040532833391
Loss is  189.94191593520713
Loss is  178.86217914308722
Loss is  169.0637165563023
Loss is  161.2122407440949
Loss is  154.74469790070924
Loss is  147.1448781803512
Loss is  139.9645981375958
Loss is  133.57090940753596
Loss is  129.6620490830562
Loss is  124.38147428928198
Loss is  119.25534791896185
Loss is  114.73502016763948
Loss is  109.3017113105007
Loss is  106.90537652800904
Loss is  101.3120588178549
Loss is  97.34975013634583
Loss is  94.87665301553037
Loss is  92.6699659387588
Loss is  88.05765436579578
Loss is  86.33213988225786
Loss is  82.50154923302192
Loss is  79.447459

In [21]:
accuracy(X,Y,classifier)

0.9910152740341419