In [1]:
import numpy as np
import pandas as pd
import prettytable
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss
from sklearn.model_selection import train_test_split

In [2]:
import time
main_start=time.time()
time_log={"main_start":main_start}


In [3]:
#READING THE BERT EMBEDDINGS AND Y MATRIX
xl_embedding=np.load("xl_embeddings.npz")
label_values=np.load("Y.npz")

In [4]:
type1_XL_Embeddings=xl_embedding["t1"]
type2_XL_Embeddings=xl_embedding["t2"]
label_values=label_values["arr_0"]

In [5]:
print("Shape BERT embeddings: ",type1_XL_Embeddings.shape)
print("Shape Labels: ",label_values.shape)

Shape BERT embeddings:  (14828, 768)
Shape Labels:  (14828, 71)


In [6]:
t1_train_x, t1_test_x, t1_train_y, t1_test_y = train_test_split(type1_XL_Embeddings, label_values, test_size=0.30, random_state=234)
t2_train_x, t2_test_x, t2_train_y, t2_test_y = train_test_split(type2_XL_Embeddings, label_values, test_size=0.30, random_state=230)

In [7]:
print("Shape t1_train_x: ", t1_train_x)
print("Shape t1_test_x: ", t1_test_x)
print("Shape t1_train_y: ", t1_train_y)
print("Shape t1_test_y: ", t1_test_y)
print("Shape t2_train_x: ", t2_train_x)
print("Shape t2_test_x: ", t2_test_x)
print("Shape t2_train_y: ", t2_train_y)
print("Shape t2_test_y: ", t2_test_y)

Shape t1_train_x:  [[-1.3617519   0.34917742 -0.347661   ...  0.4747173   0.34489462
   0.41101173]
 [-1.190906    0.52735925 -0.22382991 ...  1.5797565  -1.1731603
  -0.38454008]
 [-1.1769129   0.69791657 -0.56655157 ...  1.4122146  -0.30882388
   0.64583176]
 ...
 [-0.04197738 -0.30295113  0.93122745 ... -0.69763345  0.71040845
  -1.1912576 ]
 [-1.6726701   0.90538806 -0.9346221  ...  0.6451105  -0.4554242
   0.08242729]
 [-1.1789546  -0.5192119  -0.4589492  ... -0.17118011 -1.2722951
   1.4048201 ]]
Shape t1_test_x:  [[-1.5849832  -0.5949811  -1.2933335  ...  0.44798675 -0.58947533
   0.7308828 ]
 [-0.43592796 -0.40508023 -1.999297   ...  0.5094302  -0.59811646
  -0.2568926 ]
 [-0.90659636 -0.7082342  -1.026399   ... -0.44526273  0.6033146
  -1.2407033 ]
 ...
 [ 0.52638185  0.58106923 -2.2913098  ... -0.8800179  -0.7108759
  -1.2504671 ]
 [-2.0825336  -1.0746661  -1.6944818  ...  1.0884634   1.4700074
  -0.4222801 ]
 [-1.141141    1.2463706  -0.5987656  ...  0.35659948  0.51518744
 

In [8]:
# CHECKING IF THE METRICS ARE COORRECTLY WORKING

real = t2_test_y
predicted =t2_test_y

accuracy = accuracy_score(real, predicted)
hamLoss = hamming_loss(real, predicted)
# element wise correctness
term_wise_accuracy=np.sum(np.logical_not(np.logical_xor(real, predicted)))/real.size

macro_precision = precision_score(real, predicted, average='macro')
macro_recall = recall_score(real, predicted, average='macro')
macro_f1 = f1_score(real, predicted, average='macro')

micro_precision = precision_score(real, predicted, average='micro')
micro_recall = recall_score(real, predicted, average='micro')
micro_f1 = f1_score(real, predicted, average='micro')

metricTable=prettytable.PrettyTable()
metricTable.field_names = ["Metric", "Macro Value", "Micro Value"]
metricTable.add_row(["Hamming Loss","{0:.3f}".format(hamLoss) ,""])
metricTable.add_row(["Term Wise Accuracy","{0:.3f}".format(term_wise_accuracy) ,""])

metricTable.add_row(["Accuracy","{0:.3f}".format(accuracy),""])
metricTable.add_row(["Precision","{0:.3f}".format(macro_precision),"{0:.3f}".format(micro_precision)])
metricTable.add_row(["Recall","{0:.3f}".format(macro_recall),"{0:.3f}".format(micro_recall)])
metricTable.add_row(["F1-measure","{0:.3f}".format(macro_f1),"{0:.3f}".format(micro_f1)])

print(metricTable)

+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.000    |             |
| Term Wise Accuracy |    1.000    |             |
|      Accuracy      |    1.000    |             |
|     Precision      |    1.000    |    1.000    |
|       Recall       |    1.000    |    1.000    |
|     F1-measure     |    1.000    |    1.000    |
+--------------------+-------------+-------------+


In [9]:
# Activation Functions

# def _identity(x):
#     return x
# def _binary_step(x, threshold = 0):
#     return 1 if x=threshold else 0
# def _biploar_step(x ,threshold = 0):
#     return 1 if x>=threshold else -1
# def _binary_sigmoid(x):
#     return 1. / (1. + np.exp(-x))
# def _bipolar_sigmoid(x):
#     return (1. - np.exp(-x))/(1. + np.exp(-x))
# def _relu_function(x):
#     return np.max(0, x)
# def _relu_leaky(x):
#     return np.max(0.01*x, x)


_identity =np.vectorize(lambda x: x)
_binary_step =np.vectorize(lambda x,t=0: 1 if x>t else 0)
_biploar_step =np.vectorize(lambda x,t=0: 1 if x>t else -1)
_binary_sigmoid=np.vectorize(lambda x: 1. / (1. + np.exp(-x)))
_bipolar_sigmoid=np.vectorize(lambda x: (1. - np.exp(-x))/(1. + np.exp(-x)))
_relu_function=np.vectorize(lambda x: np.max([0, x]))
_relu_leaky=np.vectorize(lambda x: np.max([0.01*x, x]))

In [10]:


class ELM_MultiLabel:
    def __init__(self, input_nodes, hidden_nodes, output_nodes, activation="_identity", bias=True, random_gen="uniform"):
        """
        Args:
            input_nodes ([integer]): Number of Input nodes
            hidden_nodes ([integer]): Number of hidden nodes
            output_nodes ([integer]): Number of output nodes
            activation ([function]): The function name which will be used as the activation function in the hidden layer. Defaults to "_identity".
                possible values: _binary_step, _biploar_step, _binary_sigmoid, _bipolar_sigmoid, _relu_function, _relu_leaky, _identity
            bias ([boolean]): Flag to use bias, if True then randomly generate bias @random_gen else bias - 0.
            random_gen (str, optional): The type way in which random weight are generated. Defaults to "uniform".
        """
        self.__input_nodes = input_nodes
        self.__hidden_nodes = hidden_nodes
        self.__output_nodes = output_nodes

        if random_gen == "uniform":
            self.__beta = np.random.uniform(-1.,1.,size = (self.__hidden_nodes, self.__output_nodes))
            self.__alpha = np.random.uniform(-1.,1.,size = (self.__input_nodes, self.__hidden_nodes))
            self.__bias = np.random.uniform(size = (self.__hidden_nodes,))
        else:
            self.__beta = np.random.normal(-1.,1.,size=(self.__hidden_nodes, self.__output_nodes))
            self.__alpha = np.random.normal(-1.,1.,size=(self.__input_nodes, self.__hidden_nodes))
            self.__bias = np.random.normal(size=(self.__n_hidden_nodes,))
        

        if activation == "_biploar_step":
            self.__activation = _biploar_step
        elif activation == "_bipolar_sigmoid":
            self.__activation = _bipolar_sigmoid
        elif activation == "_relu_leaky":
            self.__activation =_relu_leaky
        elif activation == "_binary_step":
            self.__activation =_binary_step
        elif activation == "_binary_sigmoid":
            self.__activation =_binary_sigmoid
        elif activation == "_relu_function":
            self.__activation =_relu_function
        else:
            self.__activation =_identity
    

    def getInputNodes(self):
        return  self.__input_nodes

    def getHiddenNodes(self):
        return  self.__hidden_nodes

    def getOutputNodes(self):
        return  self.__output_nodes
    
    def getBetaWeights(self):
        return self.__beta
    
    def getAlphaWeight(self):
        return self.__alphs
    
    def getBias(self):
        return self.__bias

    def __get_H_matrix(self, train_x, verbose=False):
        # 1 Propagate data from Input to hidden Layer
        if verbose:
            print("Propagate data from Input to hidden Layer")
        inp = np.dot(train_x , self.__alpha)
        if verbose:
            print(inp)
            print("Adding Biases")
        inp = inp  + self.__bias
        if verbose:
            print(inp)
            print("Applyin activation function")
        inp_activation = np.apply_along_axis(self.__activation, 1, inp)
        return inp_activation

    def fit(self, train_x, train_y, verbose = False, show_metrics = True):
        """
        This function calculates the Beta weights or the output weights
        train_x : input matrix
        train_y : output matrix to be predicted or learned upon unipolar

        returns: if test_y is not given then
                returns the predicted output
                if test_y is given then returns predicted output and evaluation metrics dict 
        """
        if verbose:
            print("train_x shape:", train_x.shape)
            print("train_y shape:", train_y.shape)
        inp_activation = self.__get_H_matrix(train_x, verbose)
        # This is the H matrix getting its Moore Penrose Inverse
        if verbose:
            print(inp_activation)
            print("Getting the Generalized Moore Penrose Inverse")
        generalizedInverse = np.linalg.pinv(inp_activation)
        if verbose:
            print(generalizedInverse)
            print("Finding Beta, output weights")
        # Now find output weight matrix Beta 
        # convert input Y values according to the threshold using biploar step function
        _bipolar_y=  np.apply_along_axis(_biploar_step, 1, train_y)
        self.__beta = np.dot(generalizedInverse, _bipolar_y)

        if verbose:
            print("Beta Matrix Weights")
            print(self.__beta)

        # print("Model Metrics, for Training :")
        return self.predict(train_x, train_y,verbose,show_metrics)
    
    def predict(self, test_x, test_y = None, verbose = False, show_metrics= True):
        """
        preditcts the output for the input test data
        call this after calling the fit.
        test_data shape should be (batch_size,768 or input_nodes)
        output_shape will be (batch_size, 71 or output_nodes)

        returns: if test_y is not given then
                returns the predicted output
                if test_y is given then returns predicted output and evaluation metrics dict
        """
        if verbose:
            print("Predicting outputs")
        inp_activation = self.__get_H_matrix(test_x, verbose)
        output_predicted = np.dot(inp_activation, self.__beta)
        # convert predicted according to the threshold using biploar step function
        predicted_bipolar =  np.apply_along_axis(_biploar_step, 1, output_predicted)
        predicted_binary = np.apply_along_axis(_binary_step, 1, predicted_bipolar)

        if verbose:
            print("predicted output")
            print(output_predicted)
            print("predicted_bipolar")
            print(predicted_bipolar)
            print("predicted_binary")
            print(predicted_binary)
            print("Original Binary")
            print(test_y)

        eval_dict={}
        if (test_y is not None):
            eval_dict=self.__evaluate(test_y,predicted_binary, for_test=False)
        if(test_y is not None):
            return predicted_binary, eval_dict
        else:
            return predicted_binary

    def __evaluate(self, real, predicted, for_test=True):
        """
        real values as 0,1
        predicted values as 0,1
        """
        # Now we find accuracy, precision, recall, Hamming Loss and F1 Measure
        accuracy = accuracy_score(real, predicted)
        hamLoss = hamming_loss(real, predicted)
        # element wise correctness
        term_wise_accuracy=np.sum(np.logical_not(np.logical_xor(real, predicted)))/real.size

        macro_precision = precision_score(real, predicted, average='macro')
        macro_recall = recall_score(real, predicted, average='macro')
        macro_f1 = f1_score(real, predicted, average='macro')

        micro_precision = precision_score(real, predicted, average='micro')
        micro_recall = recall_score(real, predicted, average='micro')
        micro_f1 = f1_score(real, predicted, average='micro')
        
        metricTable=prettytable.PrettyTable()
        metricTable.field_names = ["Metric", "Macro Value", "Micro Value"]
        metricTable.add_row(["Hamming Loss","{0:.3f}".format(hamLoss) ,""])
        metricTable.add_row(["Term Wise Accuracy","{0:.3f}".format(term_wise_accuracy) ,""])

        metricTable.add_row(["Accuracy","{0:.3f}".format(accuracy),""])
        metricTable.add_row(["Precision","{0:.3f}".format(macro_precision),"{0:.3f}".format(micro_precision)])
        metricTable.add_row(["Recall","{0:.3f}".format(macro_recall),"{0:.3f}".format(micro_recall)])
        metricTable.add_row(["F1-measure","{0:.3f}".format(macro_f1),"{0:.3f}".format(micro_f1)])

        print(metricTable)

        #
        # print("Metrics @ Literature")
        lit_accuracy, lit_precision, lit_recall, lit_f1 = self.get_eval_metrics(real,predicted)

        return_dict = {"HiddenNodes": self.getHiddenNodes(),
                "lit_accuracy": lit_accuracy,
                "lit_precision": lit_precision,
                "lit_recall": lit_recall,
                "lit_f1": lit_f1,
                "sklearn_hamLoss": hamLoss,
                "sklearn_accuracy": accuracy,
                "term_wise_accuracy": term_wise_accuracy,
                "sklearn_macro_precision": macro_precision,
                "sklearn_micro_precision": micro_precision,
                "sklearn_macro_recall": macro_recall,
                "sklearn_micro_recall": micro_recall,
                "sklearn_macro_f1": macro_f1,
                "sklearn_micro_f1": micro_f1,
                }

        return return_dict

    def get_eval_metrics(self, real, predicted, verbose= False):
        err_cnt_accuracy=0
        err_cnt_precision=0
        err_cnt_recall=0
        if verbose:
            print(real)
            print(predicted)
        for x in range(real.shape[0]):
            err_and= np.logical_and(real[x],predicted[x])
            err_or = np.logical_or(real[x],predicted[x])
            # Accuracy
            err_cnt_accuracy +=(sum(err_and)/sum(err_or))

            # Precision
            if sum(err_and) != 0:
                err_cnt_precision += (sum(err_and) / sum(predicted[x]))
            # Recall
            err_cnt_recall += (sum(err_and) / sum(real[x]))
            if verbose:
                print("Iteration :",x)
                print((sum(err_and)/sum(err_or)))
                print(err_and)
                print(err_or)
        
        # err_count_hamming = np.zeros((real.shape))

        # for i in range(real.shape[0]):
        #     for j in range(real.shape[1]):
        #         if real[i,j] != predicted[i,j]:
        #             err_count_hamming[1,j] = err_count_hamming[1,j]+1

        # sum_err = np.sum(err_count_hamming);
        # HammingLoss = sum_err/real.size;
        accuracy = err_cnt_accuracy / real.shape[0]
        precision = err_cnt_precision / real.shape[0]
        recall = err_cnt_recall / real.shape[0]
        f1 = 2*((precision*recall)/(precision+recall))
        if verbose:
            print("Final: ")
            # print("Hamming Loss: ", HammingLoss)
            print("Accuracy: ",accuracy)
            print("precision: ",precision)
            print("recall: ",recall)
            print("f1: ",f1)

        # metricTable=prettytable.PrettyTable()
        # metricTable.field_names = ["Metric", "Value"]
        # metricTable.add_row([" Literature Hamming Loss","{0:.3f}".format(HammingLoss)])
        # metricTable.add_row(["Literature Accuracy","{0:.3f}".format(accuracy)])

        # metricTable.add_row(["Literature Precision","{0:.3f}".format(precision)])
        # metricTable.add_row(["LiteratureRecall","{0:.3f}".format(recall)])
        # metricTable.add_row(["LiteratureF1-measure","{0:.3f}".format(f1)])

        # # print(metricTable)

        return accuracy,precision,recall,f1


Now The preprocessing and is done.

Now We will run the model for both types data sets we have viz.
- TRAIN X
  - t1_train_x
  - t2_train_x
- TEST X
  - t1_test_x
  - t2_test_x
- TRAIN Y
  - t1_train_y
  - t2_train_y
- TEST Y
  - t1_test_y
  - t2_test_y


In [11]:
# list_of_models_hidden_nodes=[5000, 200, 300, 400, 500, 1000, 2000, 3000, 4000, 5000, 10000, 15000, 20000]
list_of_models_hidden_nodes=[100, 200, 300, 400, 500, 1000, 2000, 3000, 4000, 5000, 10000]#, 15000, 20000]


INPUT_NODES= 768
OUTPUT_NODES= 71
activations= ["_identity","_biploar_step","_bipolar_sigmoid","_relu_leaky","_binary_sigmoid"]
# activations= ["_identity"]

randomizations ="uniform"
datasets ={"t1_xl":(t1_train_x,t1_train_y,t1_test_x,t1_test_y),"t2_xl":(t2_train_x,t2_train_y,t2_test_x,t2_test_y)}



In [12]:
metrics_dict_list=[]

def add_data_to_metric_list(eval_dict, activation, type, start, phase, end, metrics_dict_list=metrics_dict_list):
    eval_dict["activation"]=activation
    eval_dict["type"]=type
    eval_dict["phase"]=phase
    eval_dict["total_time"]=end-start

    metrics_dict_list.append(eval_dict)


Testing the above function with a simple 50 hidden layer node model

In [13]:
# to store the models
model_dict={}

**TRAINING**

In [14]:
for dataset in datasets.keys():
    for HIDDEN_NODES in list_of_models_hidden_nodes:
        for activation in activations:
                start = time.time()
                print(dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations)
                model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations]= ELM_MultiLabel(input_nodes=INPUT_NODES,hidden_nodes=HIDDEN_NODES,output_nodes=OUTPUT_NODES, activation=activations)
                predicted, eval_dict=model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations].fit(datasets[dataset][0],datasets[dataset][1], show_metrics=True)

                end =time.time()
                add_data_to_metric_list(eval_dict, activation, dataset, start, "train", end)


t1_xl_100__identity_uniform
(100, 71)
(100, 71)
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.040    |             |
| Term Wise Accuracy |    0.960    |             |
|      Accuracy      |    0.040    |             |
|     Precision      |    0.067    |    0.636    |
|       Recall       |    0.011    |    0.092    |
|     F1-measure     |    0.015    |    0.161    |
+--------------------+-------------+-------------+
t1_xl_200__identity_uniform
(200, 71)
(200, 71)
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.039    |             |
| Term Wise Accuracy |    0.961    |             |
|      Accuracy      |    0.057    |             |
|     Precision      |    0.106    |    0.656    |
|       Recall       |    0.015    | 

**TESTING**

In [15]:
for dataset in datasets.keys():
    for HIDDEN_NODES in list_of_models_hidden_nodes:
        for activation in activations:
                start = time.time()
                print(dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations)
                model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations]
                predicted, eval_dict=model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations].predict(datasets[dataset][2],datasets[dataset][3], show_metrics=True)
                end =time.time()
                add_data_to_metric_list(eval_dict, activation, dataset, start, "test", end)


t1_xl_100__identity_uniform
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.041    |             |
| Term Wise Accuracy |    0.959    |             |
|      Accuracy      |    0.039    |             |
|     Precision      |    0.049    |    0.613    |
|       Recall       |    0.011    |    0.087    |
|     F1-measure     |    0.014    |    0.152    |
+--------------------+-------------+-------------+
t1_xl_200__identity_uniform
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.041    |             |
| Term Wise Accuracy |    0.959    |             |
|      Accuracy      |    0.049    |             |
|     Precision      |    0.065    |    0.620    |
|       Recall       |    0.014    |    0.106    |
|     F1-measure     |    

In [16]:
_metrics_df= pd.DataFrame(metrics_dict_list)
_metrics_df

Unnamed: 0,HiddenNodes,lit_accuracy,lit_precision,lit_recall,lit_f1,sklearn_hamLoss,sklearn_accuracy,term_wise_accuracy,sklearn_macro_precision,sklearn_micro_precision,sklearn_macro_recall,sklearn_micro_recall,sklearn_macro_f1,sklearn_micro_f1,activation,type,phase,total_time
0,100,0.101053,0.213091,0.109847,0.144965,0.040084,0.039696,0.959916,0.06679,0.635894,0.011285,0.092071,0.014721,0.160852,_identity,t1_xl,train,7.354751
1,200,0.130919,0.263681,0.141759,0.184388,0.039474,0.057424,0.960526,0.105583,0.655833,0.015025,0.113536,0.019801,0.193563,_identity,t1_xl,train,7.67603
2,300,0.140656,0.281562,0.150836,0.196438,0.039217,0.063686,0.960783,0.098221,0.665355,0.016445,0.120983,0.021755,0.204739,_identity,t1_xl,train,7.97215
3,100,0.114995,0.242605,0.125279,0.165233,0.040237,0.045573,0.959763,0.071272,0.65589,0.01309,0.104058,0.016782,0.17962,_identity,t2_xl,train,7.436743
4,200,0.138363,0.279186,0.150622,0.195676,0.039785,0.059158,0.960215,0.104288,0.668948,0.015599,0.119061,0.020039,0.202144,_identity,t2_xl,train,7.726508
5,300,0.152799,0.302855,0.16563,0.214146,0.039328,0.067348,0.960672,0.14752,0.684509,0.017955,0.131596,0.023289,0.220752,_identity,t2_xl,train,8.099153
6,100,0.098505,0.203585,0.108204,0.141305,0.04124,0.039335,0.95876,0.048701,0.613005,0.01091,0.086863,0.014262,0.152164,_identity,t1_xl,test,2.9293
7,200,0.121532,0.251068,0.133909,0.174661,0.040864,0.049,0.959136,0.065241,0.619983,0.014222,0.105588,0.018644,0.180444,_identity,t1_xl,test,3.059177
8,300,0.131357,0.261666,0.144875,0.186494,0.040769,0.056192,0.959231,0.071293,0.61944,0.015503,0.111755,0.020407,0.189349,_identity,t1_xl,test,3.153177
9,100,0.11285,0.236682,0.122041,0.161043,0.039487,0.046527,0.960513,0.051802,0.624826,0.013011,0.103674,0.016585,0.177839,_identity,t2_xl,test,2.994913


**WRITING METRICS DATA TO FILE**

In [None]:
Final_metrics_df.to_csv("Final_ELM_Mertics_XL.csv")

In [None]:
Final_metrics_df

Doing Testing