# LLSF - Learning Label Specific Features for Multi-Label Classifcation

## 1. Importing libraries

In [1]:
import numpy as np
import pandas as pd
import scipy.io as sio
from numpy.linalg import inv
from numpy import linalg as LA
from sklearn.metrics.pairwise import cosine_similarity as cossim
from numpy import count_nonzero
from sklearn.metrics import f1_score,hamming_loss,label_ranking_average_precision_score,zero_one_loss,auc,coverage_error

## 2. Loading already pre-processed data

In [2]:
data = sio.loadmat('datasets/emotions.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN64, Created on: Wed May 04 19:29:54 2016',
 '__version__': '1.0',
 '__globals__': [],
 'X': <391x72 sparse matrix of type '<class 'numpy.float64'>'
 	with 27995 stored elements in Compressed Sparse Column format>,
 'Y': <391x6 sparse matrix of type '<class 'numpy.float64'>'
 	with 709 stored elements in Compressed Sparse Column format>,
 'Xt': <202x72 sparse matrix of type '<class 'numpy.float64'>'
 	with 14487 stored elements in Compressed Sparse Column format>,
 'Yt': <202x6 sparse matrix of type '<class 'numpy.float64'>'
 	with 399 stored elements in Compressed Sparse Column format>}

In [3]:
X=data['X'].toarray()
Y=data['Y'].toarray()
Xt=data['Xt'].toarray()
Yt=data['Yt'].toarray()

## 3. Setting Optimal parameters for LLSF (as suggested by authors)

In [4]:
optmParameter={}
optmParameter['alpha']             = 2**(-5)       #label correlation
optmParameter['beta']              = 2**(-3)       #sparsity
optmParameter['gamma']             = 0.1           #initialization for W
optmParameter['maxIter']           = 100           #Maximum iteration
optmParameter['minimumLossMargin'] = 0.0001        #Setting condition for convergence
optmParameter['Threshold']         = 0.5           #The threshold for classifying 0 or 1 for labels

## 4. LLSF Algorithm

In [8]:
def LLSF(X,Y,Xt,optmParameter):
    
    #This function outouts the weight matrix for features
    def LLSF_weight_mat(X,Y,optmParameter):
    
    #Optimal Parameters
    
        alpha            = optmParameter['alpha']
        beta             = optmParameter['beta']
        gamma            = optmParameter['gamma']
        maxIter          = optmParameter['maxIter']
        miniLossMargin   = optmParameter['minimumLossMargin']
    
    #Initialisation
    
        num_dim          = X.shape[1]
        XTX              = np.matmul(X.T,X)
        XTY              = np.matmul(X.T,Y)
        W_s              = np.matmul(inv(XTX + gamma * np.eye(num_dim)),XTY)
        W_s_1            = W_s
        eps              = 10**-8
        R                = cossim(np.transpose(Y + eps),np.transpose(Y + eps))
        Lip              = np.sqrt(2*np.power((LA.norm(XTX,'fro')),2) + np.power((LA.norm(alpha * R,'fro')),2))
        bk               = 1
        bk_1             = 1  
    
    #The Soft-thresholding function
    
        def softthres(W_t,lambd):
            W = np.maximum((W_t-lambd),lambd) - np.maximum(-W_t-lambd,lambd)
            return W
        
    #LLSF algorithm using Accelersted Proximal Gradient
        oldloss             = 0
        iteration           = 0
        while iteration <= maxIter:
            W_s_k           = W_s + ((bk_1 - 1)/bk) * (W_s - W_s_1)
            Gw_s_k          = W_s_k - ((1/Lip) * ((np.matmul(XTX,W_s_k) - XTY + alpha * np.matmul(W_s_k,R))))
            bk_1            = bk
            bk              = (1 + np.sqrt(4*bk**2 + 1))/2
            W_s_1           = W_s
            W_s             = softthres(Gw_s_k,beta/Lip)
            predictionLoss  = LA.norm((X@W_s - Y),'fro')
            correlation     = np.trace(np.matmul(R,np.matmul(W_s.T,W_s)))
            sparsity        = 1.0 - ( count_nonzero(W_s) / float(W_s.size) )  #sum(sum(W_s!=0))
            totalloss       = predictionLoss + alpha*correlation + beta*sparsity
            if np.absolute(oldloss - totalloss) <= miniLossMargin:
                break
            elif totalloss <=0:
                break
            else:
                oldloss = totalloss
            iteration+=1
        return W_s
    
    threshold = optmParameter['Threshold']
    
    #This function predicts the output label matrix.
    
    def LLSF_prediction(Xt,weightmat,threshold):
        St    = np.matmul(Xt,weightmat)
        Y_pre = np.maximum(np.sign(St - threshold),0)
        return Y_pre
    
    weightmat = LLSF_weight_mat(X,Y,optmParameter)
    results   = LLSF_prediction(Xt,weightmat,threshold)
    
    return results

In [9]:
results = LLSF(X,Y,Xt,optmParameter)

## 4. Results

In [10]:
print('Hamming loss : {}'.format(hamming_loss(Yt,results)))
print('zero_one_loss : {}'.format(zero_one_loss(Yt,results)))
print('coverage_error : {}'.format(coverage_error(Yt,results)))
print('label_ranking_average_precision_score : {}'.format(label_ranking_average_precision_score(Yt,results)))

Hamming loss : 0.22194719471947194
zero_one_loss : 0.7623762376237624
coverage_error : 4.599009900990099
label_ranking_average_precision_score : 0.6170242024202425
