In [1]:
import numpy as np
import pandas as pd
import prettytable
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, hamming_loss
from sklearn.model_selection import train_test_split

In [2]:
import time
main_start=time.time()
time_log={"main_start":main_start}


In [3]:
#READING THE BERT EMBEDDINGS AND Y MATRIX
bert_embedding=np.load("embeddings.npz")
label_values=np.load("Y.npz")

In [4]:
type1_BERT_Embeddings=bert_embedding["t1"]
type2_BERT_Embeddings=bert_embedding["t2"]
label_values=label_values["arr_0"]

In [5]:
t1_train_x, t1_test_x, t1_train_y, t1_test_y = train_test_split(type1_BERT_Embeddings, label_values, test_size=0.33, random_state=234)
t2_train_x, t2_test_x, t2_train_y, t2_test_y = train_test_split(type2_BERT_Embeddings, label_values, test_size=0.33, random_state=230)

In [6]:
print("Shape t1_train_x: ", t1_train_x)
print("Shape t1_test_x: ", t1_test_x)
print("Shape t1_train_y: ", t1_train_y)
print("Shape t1_test_y: ", t1_test_y)
print("Shape t2_train_x: ", t2_train_x)
print("Shape t2_test_x: ", t2_test_x)
print("Shape t2_train_y: ", t2_train_y)
print("Shape t2_test_y: ", t2_test_y)

Shape t1_train_x:  [[-0.36826527 -0.43821245 -0.15154007 ... -0.21317424  0.6740728
  -0.89827394]
 [-0.25526345 -1.0361675   0.31187654 ... -0.41380176  0.23752367
  -1.4694043 ]
 [-0.30190113 -0.6179864  -0.08102065 ...  0.07218534  0.40399817
  -0.5914986 ]
 ...
 [-0.00596368 -0.52669936  0.2841632  ... -0.18374442  0.55380374
  -0.50675297]
 [-0.48843777 -0.7013829  -0.20389749 ...  0.05295624  0.48787463
  -0.24249634]
 [-0.4864386  -0.4691094  -0.20912297 ... -0.0936537   0.39957207
  -0.4435219 ]]
Shape t1_test_x:  [[-0.26130378 -0.69637364  0.16091792 ... -0.04362152  0.17829853
  -0.9757811 ]
 [ 0.06757689 -0.1472328   0.07639776 ...  0.09480745  0.31186146
  -1.1533178 ]
 [ 0.19800258 -0.887366    0.35757995 ... -0.4777546   0.3292356
  -0.32011172]
 ...
 [-0.1094763  -0.43274575  0.5285199  ... -0.2149579   0.23089571
  -0.5284223 ]
 [-0.14528538 -0.18008953  0.5997345  ... -0.39525276 -0.03062117
  -0.47663215]
 [-0.46414566 -0.2350843  -0.38872486 ... -0.07529384  0.208867

In [7]:

# Activation Functions

# def _identity(x):
#     return x
# def _binary_step(x, threshold = 0):
#     return 1 if x=threshold else 0
# def _biploar_step(x ,threshold = 0):
#     return 1 if x>=threshold else -1
# def _binary_sigmoid(x):
#     return 1. / (1. + np.exp(-x))
# def _bipolar_sigmoid(x):
#     return (1. - np.exp(-x))/(1. + np.exp(-x))
# def _relu_function(x):
#     return np.max(0, x)
# def _relu_leaky(x):
#     return np.max(0.01*x, x)


_identity =np.vectorize(lambda x: x)
_binary_step =np.vectorize(lambda x,t=0: 1 if x>t else 0)
_biploar_step =np.vectorize(lambda x,t=0: 1 if x>t else -1)
_binary_sigmoid=np.vectorize(lambda x: 1. / (1. + np.exp(-x)))
_bipolar_sigmoid=np.vectorize(lambda x: (1. - np.exp(-x))/(1. + np.exp(-x)))
_relu_function=np.vectorize(lambda x: np.max([0, x]))
_relu_leaky=np.vectorize(lambda x: np.max([0.01*x, x]))

In [8]:


class ELM_MultiLabel:
    def __init__(self, input_nodes, hidden_nodes, output_nodes, activation="_identity", bias=True, random_gen="uniform"):
        """
        Args:
            input_nodes ([integer]): Number of Input nodes
            hidden_nodes ([integer]): Number of hidden nodes
            output_nodes ([integer]): Number of output nodes
            activation ([function]): The function name which will be used as the activation function in the hidden layer. Defaults to "_identity".
                possible values: _binary_step, _biploar_step, _binary_sigmoid, _bipolar_sigmoid, _relu_function, _relu_leaky, _identity
            bias ([boolean]): Flag to use bias, if True then randomly generate bias @random_gen else bias - 0.
            random_gen (str, optional): The type way in which random weight are generated. Defaults to "uniform".
        """
        self.__input_nodes = input_nodes
        self.__hidden_nodes = hidden_nodes
        self.__output_nodes = output_nodes

        if random_gen == "uniform":
            self.__beta = np.random.uniform(-1.,1.,size = (self.__hidden_nodes, self.__output_nodes))
            self.__alpha = np.random.uniform(-1.,1.,size = (self.__input_nodes, self.__hidden_nodes))
            self.__bias = np.random.uniform(size = (self.__hidden_nodes,))
        else:
            self.__beta = np.random.normal(-1.,1.,size=(self.__hidden_nodes, self.__output_nodes))
            self.__alpha = np.random.normal(-1.,1.,size=(self.__input_nodes, self.__hidden_nodes))
            self.__bias = np.random.normal(size=(self.__n_hidden_nodes,))
        

        if activation == "_biploar_step":
            self.__activation = _biploar_step
        elif activation == "_bipolar_sigmoid":
            self.__activation = _bipolar_sigmoid
        elif activation == "_relu_leaky":
            self.__activation =_relu_leaky
        elif activation == "_binary_step":
            self.__activation =_binary_step
        elif activation == "_binary_sigmoid":
            self.__activation =_binary_sigmoid
        elif activation == "_relu_function":
            self.__activation =_relu_function
        else:
            self.__activation =_identity
    

    def getInputNodes(self):
        return  self.__input_nodes

    def getHiddenNodes(self):
        return  self.__hidden_nodes

    def getOutputNodes(self):
        return  self.__output_nodes
    
    def getBetaWeights(self):
        return self.__beta
    
    def getAlphaWeight(self):
        return self.__alphs
    
    def getBias(self):
        return self.__bias

    def __get_H_matrix(self, train_x, verbose=False):
        # 1 Propagate data from Input to hidden Layer
        if verbose:
            print("Propagate data from Input to hidden Layer")
        inp = np.dot(train_x , self.__alpha)
        if verbose:
            print(inp)
            print("Adding Biases")
        inp = inp  + self.__bias
        if verbose:
            print(inp)
            print("Applyin activation function")
        inp_activation = np.apply_along_axis(self.__activation, 1, inp)
        return inp_activation

    def fit(self, train_x, train_y, verbose = False, show_metrics = True):
        """
        This function calculates the Beta weights or the output weights
        train_x : input matrix
        train_y : output matrix to be predicted or learned upon unipolar

        returns: if test_y is not given then
                returns the predicted output
                if test_y is given then returns predicted output and evaluation metrics dict 
        """
        if verbose:
            print("train_x shape:", train_x.shape)
            print("train_y shape:", train_y.shape)
        inp_activation = self.__get_H_matrix(train_x, verbose)
        # This is the H matrix getting its Moore Penrose Inverse
        if verbose:
            print(inp_activation)
            print("Getting the Generalized Moore Penrose Inverse")
        generalizedInverse = np.linalg.pinv(inp_activation)
        if verbose:
            print(generalizedInverse)
            print("Finding Beta, output weights")
        # Now find output weight matrix Beta 
        # convert input Y values according to the threshold using biploar step function
        _bipolar_y=  np.apply_along_axis(_biploar_step, 1, train_y)
        self.__beta = np.dot(generalizedInverse, _bipolar_y)
        if verbose:
            print("Beta Matrix Weights")
            print(self.__beta)

        # print("Model Metrics, for Training :")
        return self.predict(train_x, train_y,verbose,show_metrics)
    
    def predict(self, test_x, test_y = None, verbose = False, show_metrics= True):
        """
        preditcts the output for the input test data
        call this after calling the fit.
        test_data shape should be (batch_size,768 or input_nodes)
        output_shape will be (batch_size, 71 or output_nodes)

        returns: if test_y is not given then
                returns the predicted output
                if test_y is given then returns predicted output and evaluation metrics dict
        """
        if verbose:
            print("Predicting outputs")
        inp_activation = self.__get_H_matrix(test_x, verbose)
        output_predicted = np.dot(inp_activation, self.__beta)
        # convert predicted according to the threshold using biploar step function
        predicted_bipolar =  np.apply_along_axis(_biploar_step, 1, output_predicted)
        predicted_binary = np.apply_along_axis(_binary_step, 1, predicted_bipolar)

        if verbose:
            print("predicted output")
            print(output_predicted)
            print("predicted_bipolar")
            print(predicted_bipolar)
            print("predicted_binary")
            print(predicted_binary)
            print("Original Binary")
            print(test_y)

        eval_dict={}
        if (test_y is not None):
            eval_dict=self.__evaluate(test_y,predicted_binary, for_test=False)
        if(test_y is not None):
            return predicted_binary, eval_dict
        else:
            return predicted_binary

    def __evaluate(self, real, predicted, for_test=True):
        """
        real values as 0,1
        predicted values as 0,1
        """
        # Now we find accuracy, precision, recall, Hamming Loss and F1 Measure
        accuracy = accuracy_score(real, predicted)
        hamLoss = hamming_loss(real, predicted)
        # element wise correctness
        term_wise_accuracy=np.sum(np.logical_not(np.logical_xor(real, predicted)))/real.size

        macro_precision = precision_score(real, predicted, average='macro')
        macro_recall = recall_score(real, predicted, average='macro')
        macro_f1 = f1_score(real, predicted, average='macro')

        micro_precision = precision_score(real, predicted, average='micro')
        micro_recall = recall_score(real, predicted, average='micro')
        micro_f1 = f1_score(real, predicted, average='micro')
        
        metricTable=prettytable.PrettyTable()
        metricTable.field_names = ["Metric", "Macro Value", "Micro Value"]
        metricTable.add_row(["Hamming Loss","{0:.3f}".format(hamLoss) ,""])
        metricTable.add_row(["Term Wise Accuracy","{0:.3f}".format(term_wise_accuracy) ,""])

        metricTable.add_row(["Accuracy","{0:.3f}".format(accuracy),""])
        metricTable.add_row(["Precision","{0:.3f}".format(macro_precision),"{0:.3f}".format(micro_precision)])
        metricTable.add_row(["Recall","{0:.3f}".format(macro_recall),"{0:.3f}".format(micro_recall)])
        metricTable.add_row(["F1-measure","{0:.3f}".format(macro_f1),"{0:.3f}".format(micro_f1)])

        print(metricTable)

        #
        # print("Metrics @ Literature")
        lit_accuracy, lit_precision, lit_recall, lit_f1 = self.get_eval_metrics(real,predicted)

        return_dict = {"HiddenNodes": self.getHiddenNodes(),
                "lit_accuracy": lit_accuracy,
                "lit_precision": lit_precision,
                "lit_recall": lit_recall,
                "lit_f1": lit_f1,
                "sklearn_hamLoss": hamLoss,
                "sklearn_accuracy": accuracy,
                "term_wise_accuracy": term_wise_accuracy,
                "sklearn_macro_precision": macro_precision,
                "sklearn_micro_precision": micro_precision,
                "sklearn_macro_recall": macro_recall,
                "sklearn_micro_recall": micro_recall,
                "sklearn_macro_f1": macro_f1,
                "sklearn_micro_f1": micro_f1,
                }

        return return_dict

    def get_eval_metrics(self, real, predicted, verbose= False):
        err_cnt_accuracy=0
        err_cnt_precision=0
        err_cnt_recall=0
        if verbose:
            print(real)
            print(predicted)
        for x in range(real.shape[0]):
            err_and= np.logical_and(real[x],predicted[x])
            err_or = np.logical_or(real[x],predicted[x])
            # Accuracy
            err_cnt_accuracy +=(sum(err_and)/sum(err_or))

            # Precision
            if sum(err_and) != 0:
                err_cnt_precision += (sum(err_and) / sum(predicted[x]))
            # Recall
            err_cnt_recall += (sum(err_and) / sum(real[x]))
            if verbose:
                print("Iteration :",x)
                print((sum(err_and)/sum(err_or)))
                print(err_and)
                print(err_or)
        
        # err_count_hamming = np.zeros((real.shape))

        # for i in range(real.shape[0]):
        #     for j in range(real.shape[1]):
        #         if real[i,j] != predicted[i,j]:
        #             err_count_hamming[1,j] = err_count_hamming[1,j]+1

        # sum_err = np.sum(err_count_hamming);
        # HammingLoss = sum_err/real.size;
        accuracy = err_cnt_accuracy / real.shape[0]
        precision = err_cnt_precision / real.shape[0]
        recall = err_cnt_recall / real.shape[0]
        f1 = 2*((precision*recall)/(precision+recall))
        if verbose:
            print("Final: ")
            # print("Hamming Loss: ", HammingLoss)
            print("Accuracy: ",accuracy)
            print("precision: ",precision)
            print("recall: ",recall)
            print("f1: ",f1)

        # metricTable=prettytable.PrettyTable()
        # metricTable.field_names = ["Metric", "Value"]
        # metricTable.add_row([" Literature Hamming Loss","{0:.3f}".format(HammingLoss)])
        # metricTable.add_row(["Literature Accuracy","{0:.3f}".format(accuracy)])

        # metricTable.add_row(["Literature Precision","{0:.3f}".format(precision)])
        # metricTable.add_row(["LiteratureRecall","{0:.3f}".format(recall)])
        # metricTable.add_row(["LiteratureF1-measure","{0:.3f}".format(f1)])

        # # print(metricTable)

        return accuracy,precision,recall,f1


Now The preprocessing and is done.

Now We will run the model for all the three types data sets we have viz.
- TRAIN X
  - t1_train_x
  - t2_train_x
- TEST X
  - t1_test_x
  - t2_test_x
- TRAIN Y
  - t1_train_y
  - t2_train_y
- TEST Y
  - t1_test_y
  - t2_test_y


In [9]:
list_of_models_hidden_nodes=[100,200, 300, 400, 500, 1000, 2000, 3000, 4000, 5000, 10000]#, 15000, 20000]

INPUT_NODES= 768
OUTPUT_NODES= 71
activations= ["_identity","_biploar_step","_bipolar_sigmoid","_relu_leaky","_binary_sigmoid"]
# activations= ["_identity"]

randomizations ="normal"
datasets ={"t1_bert":(t1_train_x,t1_train_y,t1_test_x,t1_test_y),"t2_bert":(t2_train_x,t2_train_y,t2_test_x,t2_test_y)}



In [10]:
metrics_dict_list=[]

def add_data_to_metric_list(eval_dict, activation, type, start, phase, end, metrics_dict_list=metrics_dict_list):
    eval_dict["activation"]=activation
    eval_dict["type"]=type
    eval_dict["phase"]=phase
    eval_dict["total_time"]=end-start

    metrics_dict_list.append(eval_dict)


Testing the above function with a simple 50 hidden layer node model

In [11]:
# to store the models
model_dict={}

In [12]:
for dataset in datasets.keys():
    for HIDDEN_NODES in list_of_models_hidden_nodes:
        for activation in activations:
                start = time.time()
                print(dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations)
                model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations]= ELM_MultiLabel(input_nodes=INPUT_NODES,hidden_nodes=HIDDEN_NODES,output_nodes=OUTPUT_NODES, activation=activations)
                predicted, eval_dict=model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations].fit(datasets[dataset][0],datasets[dataset][1], verbose=False, show_metrics=True)

                end =time.time()
                add_data_to_metric_list(eval_dict, activation, dataset, start, "train", end)


t1_bert_5000__identity_normal
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.038    |             |
| Term Wise Accuracy |    0.962    |             |
|      Accuracy      |    0.081    |             |
|     Precision      |    0.235    |    0.693    |
|       Recall       |    0.024    |    0.152    |
|     F1-measure     |    0.034    |    0.249    |
+--------------------+-------------+-------------+
t1_bert_200__identity_normal
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.040    |             |
| Term Wise Accuracy |    0.960    |             |
|      Accuracy      |    0.052    |             |
|     Precision      |    0.078    |    0.641    |
|       Recall       |    0.014    |    0.105    |
|     F1-measure     | 

In [13]:
for dataset in datasets.keys():
    for HIDDEN_NODES in list_of_models_hidden_nodes:
        for activation in activations:
                start = time.time()
                print(dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations)
                model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations]
                predicted, eval_dict=model_dict[dataset+"_"+str(HIDDEN_NODES)+"_"+activation+"_"+randomizations].predict(datasets[dataset][2],datasets[dataset][3], show_metrics=True)
                end =time.time()
                add_data_to_metric_list(eval_dict, activation, dataset, start, "test", end)


t1_bert_5000__identity_normal
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.041    |             |
| Term Wise Accuracy |    0.959    |             |
|      Accuracy      |    0.055    |             |
|     Precision      |    0.114    |    0.563    |
|       Recall       |    0.018    |    0.118    |
|     F1-measure     |    0.024    |    0.195    |
+--------------------+-------------+-------------+
t1_bert_200__identity_normal
+--------------------+-------------+-------------+
|       Metric       | Macro Value | Micro Value |
+--------------------+-------------+-------------+
|    Hamming Loss    |    0.041    |             |
| Term Wise Accuracy |    0.959    |             |
|      Accuracy      |    0.044    |             |
|     Precision      |    0.067    |    0.592    |
|       Recall       |    0.012    |    0.091    |
|     F1-measure     | 

In [14]:
_metrics_df= pd.DataFrame(metrics_dict_list)
_metrics_df

Unnamed: 0,HiddenNodes,lit_accuracy,lit_precision,lit_recall,lit_f1,sklearn_hamLoss,sklearn_accuracy,term_wise_accuracy,sklearn_macro_precision,sklearn_micro_precision,sklearn_macro_recall,sklearn_micro_recall,sklearn_macro_f1,sklearn_micro_f1,activation,type,phase,total_time
0,5000,0.174942,0.337462,0.188125,0.241578,0.038347,0.080632,0.961653,0.234586,0.692629,0.024335,0.151726,0.034298,0.248924,_identity,t1_bert,train,108.484474
1,200,0.119984,0.241846,0.130124,0.169207,0.039955,0.052345,0.960045,0.077644,0.640829,0.013636,0.104672,0.017916,0.179951,_identity,t1_bert,train,7.74529
2,5000,0.186166,0.359511,0.200837,0.257708,0.03836,0.085967,0.96164,0.288305,0.70677,0.026435,0.158569,0.037344,0.259024,_identity,t2_bert,train,108.230605
3,200,0.132336,0.265502,0.143,0.185883,0.040069,0.060499,0.959931,0.079276,0.65368,0.014841,0.111391,0.019532,0.190345,_identity,t2_bert,train,7.996619
4,5000,0.132934,0.267538,0.148197,0.190739,0.041097,0.054965,0.958903,0.114471,0.562907,0.017674,0.117755,0.024131,0.194767,_identity,t1_bert,test,6.342042
5,200,0.105219,0.215025,0.116366,0.15101,0.041019,0.043727,0.958981,0.067346,0.592064,0.011748,0.09055,0.015575,0.157076,_identity,t1_bert,test,3.861674
6,5000,0.147357,0.28602,0.163032,0.207684,0.04013,0.062934,0.95987,0.103019,0.565778,0.019792,0.131266,0.026278,0.213093,_identity,t2_bert,test,6.763947
7,200,0.123676,0.245879,0.134199,0.173632,0.039767,0.05517,0.960233,0.060872,0.614512,0.013969,0.105402,0.018263,0.179941,_identity,t2_bert,test,4.015265


In [None]:
**WRITING METRICS DATA TO FILE**

In [None]:
_metrics_df.to_csv("Final_ELM_Mertics_BERT.csv")

In [None]:
**END**