In [6]:
import os
import pandas as pd

from autograd import grad
import autograd.numpy as np

from numpy.linalg import inv
from numpy import linalg as LA
from numpy.matlib import repmat

np.seterr(divide='ignore', invalid='ignore')
pd.options.mode.chained_assignment = None 

In [7]:
import sys
sys.path.append('../../Python/')
from Preprocessing import standardize
from Evaluation import split_train_test_ma, train_model, eval_model
from Databases import get_databases_path, get_database

In [18]:
databases_path = '../../Databases/Sinteticas'
paths = get_databases_path(databases_path)
database = paths[0]

In [70]:
theta = 1e-50

def sigmoid(a):
    return 1./(1 + np.exp(-a))

def make_cost_function(X, Y, mui):
    def cost_function(w):
        R = Y.shape[1]
        N = Y.shape[0]
        sizew1 = X.shape[1]
        sizeW = sizew1 + sizew1*R

        W1 = w[0,0:sizew1].reshape(sizew1,1)
        W2 = w[0,sizew1:sizeW].reshape(sizew1,R)

        pi = sigmoid(np.dot(X, W1))        
        aux = sigmoid(np.dot(X,W2))
        
        ai = np.multiply(np.power((1-aux),np.absolute(Y-1)),np.power(aux, (1-np.absolute(Y-1))))
        ai = np.array(np.prod(ai, axis=1)).reshape(ai.shape[0],1) 

        bi = np.multiply(np.power((1-aux),np.absolute(Y)),np.power(aux, (1-np.absolute(Y))))
        bi = np.array(np.prod(bi, axis=1)).reshape(bi.shape[0],1) 

        aux_0 = 1-mui 
        aux_1 = 1-pi 

        aux_a = np.multiply(mui, np.log(np.multiply(pi, ai)+theta)) 
        aux_b = np.multiply(aux_0, np.log(np.multiply(aux_1, bi)+theta)) 

        print('aux_0', aux_0)
        print('aux_1', aux_1)
        
        return -(aux_a + aux_b).sum()
    return cost_function
         
def optimizer(X, Y, w_0, mui):
    derivate = grad(make_cost_function(X, Y, mui)) 
    params = w_0
    epsilon = 0.001
    normGrad = 100
    i=0
    while normGrad > 0.1 and i<300:
        gr = derivate(params)
        #print('\ngr',gr)
        params = params - epsilon * gr
        normGrad = LA.norm(gr)
        i+=1
    return params, normGrad
    
def train_mae(X_train, y_train):
    X = X_train.copy(deep=True)
    Y = y_train.copy(deep=True)
    
    X.loc[:,'b'] = 0.0
    X = X.values
    Y = Y.values
    R = Y.shape[1] # Number of annotators.
    D = X.shape[1] # Input space dimension
    N = X.shape[0] # Number of samples.
    
    sizew1 = D
    sizew2 = D*R;
    sizeW = sizew1 + sizew2
    
    w = np.random.randn(1, sizeW)
    mui = np.array(Y.mean(axis=1)).reshape(Y.shape[0],1) + theta
    
    w = repmat(np.dot(inv(np.dot(X.T,X) + np.eye(D)), np.dot(X.T,mui)),1,R+1)
    w = w.flatten().reshape(1, sizeW)
     
    w_0 = w
    loglik1 = 10000000
    difloglik = 100
    difloglik1 = -1
    i = 0
    
    while abs(difloglik) > 0.2 and i<100:
        # M-step     
        w, _ = optimizer(X, Y, w_0, mui)
        W1 = w[0,0:sizew1].reshape(sizew1,1)
        W2 = w[0,sizew1:sizeW].reshape(D,R)
           
        #E-step
        pi = sigmoid(np.dot(X, W1))
        aux = sigmoid(np.dot(X,W2))

        ai = np.multiply(np.power((1-aux),np.absolute(Y-1)),np.power(aux, (1-np.absolute(Y-1))))
        ai = np.array(np.prod(ai, axis=1)).reshape(ai.shape[0],1) 

        bi = np.multiply(np.power((1-aux),np.absolute(Y)),np.power(aux, (1-np.absolute(Y))))
        bi = np.array(np.prod(bi, axis=1)).reshape(bi.shape[0],1) 

        nummui = np.multiply(ai, pi)
        denmui = (np.multiply(ai, pi) + np.multiply(bi, (1-pi))) 
        mui =  np.divide(nummui+theta, denmui+theta)

        aux_0 = 1-mui 
        aux_1 = 1-pi 
        aux_a = np.multiply(mui, np.log(np.multiply(pi, ai)+theta)) 
        aux_b = np.multiply(aux_0, np.log(np.multiply(aux_1, bi)+theta)) 

        loglik2 = -(aux_a + aux_b).sum()
        difloglik = (loglik2 - loglik1);
        loglik1 = loglik2
        
        w_0 = w
        i+=1
    return w[0,0:sizew1].reshape(sizew1,1) 

In [71]:
print('------------------------------------------------------------------------')
print("Database:", database, '\n')
# Obtain the attributes and labels
attr_df = get_database(database, 'database_attr.csv')
labels_df = get_database(database, 'database_labels.csv')  

# Split in Training and Test Sets
X_train, X_test, y_train, y_test = split_train_test_ma(attr_df, labels_df)

# Train LFC Model
W = train_mae(X_train, y_train)

X_test.loc[:,'b'] = 0.0
predtest = sigmoid(np.dot(X_test,W))
y_pred = [True if item >= 0.5 else False for item in predtest]

print("\nResults\n")
eval_model(y_test, y_pred)

------------------------------------------------------------------------
Database: ../../Databases/Sinteticas/breast-cancer 

aux_0 [[0.4]
 [0.2]
 [0.4]
 [0.4]
 [0.6]
 [0.6]
 [0.6]
 [0.4]
 [0.2]
 [0.2]
 [0.8]
 [0.4]
 [0.2]
 [0.4]
 [0.8]
 [0.2]
 [0.8]
 [0.6]
 [0.2]
 [0.6]
 [0.8]
 [0.2]
 [0.4]
 [0.6]
 [0.2]
 [0.4]
 [0.2]
 [0.6]
 [0.6]
 [0.4]
 [0.4]
 [0.2]
 [0.2]
 [0.4]
 [1. ]
 [0.8]
 [0.4]
 [0.6]
 [0.6]
 [0.8]
 [0.6]
 [0.4]
 [1. ]
 [0.6]
 [0.6]
 [0.4]
 [0.8]
 [0.4]
 [0.4]
 [0.6]
 [0.4]
 [0.4]
 [0.4]
 [0.2]
 [0.4]
 [0.6]
 [0.2]
 [0.4]
 [0.6]
 [0.6]
 [0.6]
 [0.8]
 [0.6]
 [0.4]
 [0.6]
 [0.8]
 [0.4]
 [0.2]
 [0.6]
 [0.4]
 [0.2]
 [0.8]
 [0.4]
 [0.2]
 [0.4]
 [0.4]
 [0.4]
 [0.4]
 [0.4]
 [0.4]
 [0.6]
 [1. ]
 [0.8]
 [0.8]
 [0.6]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.8]
 [0.6]
 [0.4]
 [1. ]
 [0.6]
 [0.4]
 [0.2]
 [0.4]
 [0.4]
 [0.4]
 [0.6]
 [0.8]
 [0.8]
 [0.6]
 [0.6]
 [0.2]
 [0.6]
 [0.8]
 [0.2]
 [0.8]
 [0.6]
 [0.4]
 [0.6]
 [0.8]
 [0.6]
 [0.4]
 [0.4]
 [0.4]
 [0.4]
 [0.6]
 [0.4]
 [0.4]
 [0.4]
 [0.4]
