In [21]:
import os
import numpy as np
import pandas as pd

In [22]:
import sys
sys.path.append('../../Python/')
from Preprocessing import standardize
from Evaluation import split_train_test_ma, train_model, test_model
from Databases import get_databases_path, get_database

In [23]:
databases_path = '../../Databases/Sinteticas'
paths = get_databases_path(databases_path)

In [24]:
# Obtain the attributes and labels
attr_df = get_database(paths[1], 'database_attr.csv')
labels_df = get_database(paths[1], 'database_labels.csv')

In [25]:
# Preprocessing the attributes
standardized_attr_df = standardize(attr_df)

In [26]:
# Split in Training and Test Sets
X_train, X_test, y_train, y_test = split_train_test_ma(standardized_attr_df, labels_df)

In [27]:
annotators = y_train.columns.size
KXX = np.dot(X_train, X_train.T)

In [28]:
Y = {}
for annotator in y_train.columns:
    Y[annotator] = pd.get_dummies(y_train[annotator])

In [29]:
m = X_train.shape[0]
MOnes = np.ones((m,1))
aux_1 = np.eye(m) - np.dot(MOnes,(MOnes.T)/m)
KXXc = np.dot(aux_1, KXX, aux_1)

In [30]:
def FrobeniusProduct(A,B):
    return np.dot(A.T,B).trace()

In [31]:
KYYc = {}
KYY = {}
a = np.array([])
for annotator in y_train.columns:
    KYY[annotator] = np.dot(Y[annotator], Y[annotator].T)
    KYYc[annotator] = aux_1 * KYY[annotator] * aux_1
    a = np.append(a, FrobeniusProduct(KYYc[annotator], KXXc))

In [32]:
M = np.zeros((annotators, annotators))
for idA, annotator_a in enumerate(y_train.columns):
    for idB, annotator_b in enumerate(y_train.columns):
        M[idB, idA] = FrobeniusProduct(KYYc[annotator_b], KYYc[annotator_a]); 

In [33]:
H = 2*M
f = -2*a

In [14]:
import osqp
import scipy.sparse as sparse
import numpy as np
from numpy import linalg as LA

# Define problem data
P = sparse.csc_matrix(H)
q = f
A = sparse.csc_matrix(-np.eye(annotators))
l = np.zeros((annotators,1))
u = np.ones((annotators,1))

# Create an OSQP object
prob = osqp.OSQP()

# Setup workspace and change alpha parameter
prob.setup(P, q, A, l, u, alpha=1.0, verbose=False)

# Solve problem
results = prob.solve()

v = results.x
mu = v/LA.norm(v)
print(mu)

[0.4449998  0.44700595 0.44693058 0.45030136 0.44681382]


In [34]:
import numpy
from cvxopt import matrix
from cvxopt import solvers
from numpy import linalg as LA

P = matrix(H)
q = matrix(f)
G = -matrix(np.eye(annotators))
h = matrix(np.zeros((annotators,1)))

sol = solvers.qp(P,q,G,h)

     pcost       dcost       gap    pres   dres
 0: -1.0487e+07 -1.0487e+07  5e+00  2e+00  6e-10
 1: -1.0487e+07 -1.0487e+07  9e-02  4e-02  1e-11
 2: -1.0487e+07 -1.0487e+07  3e-02  2e-02  4e-12
 3: -1.0487e+07 -1.0487e+07  3e-02  2e-02  4e-12
 4: -8.9825e+06 -9.5036e+06  3e+06  2e-02  4e-12
 5:  3.0743e+07 -3.2956e+07  8e+07  1e-02  4e-12
 6:  2.2297e+07 -3.3668e+07  9e+07  1e-02  4e-12
 7:  2.4592e+07 -3.7573e+07  1e+08  1e-02  3e-12
 8:  1.4858e+06 -1.5440e+07  2e+07  4e-18  3e-15
 9: -2.3094e+06 -8.4176e+06  6e+06  1e-18  9e-16
10: -5.1784e+06 -1.3643e+07  8e+06  4e-18  4e-16
11: -6.7362e+06 -7.3102e+06  6e+05  9e-19  1e-16
12: -6.9647e+06 -6.9829e+06  2e+04  9e-19  1e-16
13: -6.9795e+06 -6.9797e+06  2e+02  9e-19  1e-16
14: -6.9797e+06 -6.9797e+06  2e+00  6e-23  1e-16
Optimal solution found.


In [35]:
v = sol['x']
mu = np.array(v/LA.norm(v))

In [36]:
models = {}
for annotator in y_train.columns:
    models[annotator] = train_model(X_train, y_train[annotator])

In [37]:
final_weight = np.zeros((X_test.shape[0],1))
for idx, annotator_a in enumerate(y_train.columns):
    probs = models[annotator].predict_proba(X_test)
    true_class_prob = probs[:,0]
    annotator_weight = (true_class_prob*mu[idx]).reshape((X_test.shape[0],1))
    final_weight += annotator_weight
final_weight = final_weight/mu.sum()

In [38]:
y_pred = [True if item >= 0.5 else False for item in final_weight]

In [39]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print("Global Precision:")
print(accuracy_score(y_test, y_pred))
print("\n")

print("General Report:")
print(classification_report(y_test, y_pred, target_names=['yes','no']))
print("\n")

print("Confusion Matrix: ")
matriz_confusion = confusion_matrix(y_test, y_pred)
table = pd.DataFrame(matriz_confusion)
print(table)

Global Precision:
0.9122807017543859


General Report:
              precision    recall  f1-score   support

         yes       0.93      0.94      0.93       108
          no       0.89      0.87      0.88        63

    accuracy                           0.91       171
   macro avg       0.91      0.90      0.91       171
weighted avg       0.91      0.91      0.91       171



Confusion Matrix: 
     0   1
0  101   7
1    8  55
