In [1]:
from MEM import minimize_likelihood
from datasets import get_dataset
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_absolute_error
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from scipy.optimize import minimize
import warnings

warnings.filterwarnings('ignore')

## Adults dataset

In [2]:
X_train, y_train, X_test, y_test = get_dataset('adults')
X_train.shape, y_train.shape, X_test.shape, y_test.shape 

((32561, 107), (32561, 1), (16281, 107), (16281, 1))

In [3]:
import numpy as np
from tqdm import tqdm_notebook as tqdm

def minimize_likelihood(X, y, lmbda, lr=0.01, max_iter=100, min_diff=1e-3):
    alpha = np.random.rand(X.shape[1])
    last_F = None
    for i in tqdm(range(max_iter)):
        data_multipliers = y*X
        t = data_multipliers @ alpha
        grad_multipliers = np.ones((X.shape[0]))
        grad_multipliers[t > -1] = 0.5
        max_values = np.ones(X.shape[0]) - grad_multipliers - grad_multipliers*t
        nonzero_mask = max_values > 0

        new_F = np.sum(max_values[nonzero_mask])
        if last_F is not None and abs(new_F - last_F) < min_diff:
            break
            
#         print(grad_multipliers.shape, data_multipliers.shape, nonzero_mask.shape)

        grad = - np.sum((grad_multipliers[:, None] * data_multipliers)[nonzero_mask],
                        axis=0)/X.shape[0] + 2*lmbda*alpha
        alpha -= lr*grad

    return alpha

### Feature selection

$l1$-regularized logistic regression maximizes the worst-case mutual information over
$Γ(Q)$, which seems superior to the methods maximizing a heusristic instead of the mutual information

In [4]:
logreg = LogisticRegression(penalty='l1', C=10)
logreg.fit(X_train, y_train)
logreg.score(X_test,y_test)

0.8527731711811314

In [7]:
threshold = 0.4
selected_columns = [i for i, score in enumerate(logreg.coef_[0]) if abs(score) >threshold]

In [10]:
X_train_selected = X_train[:,selected_columns]
X_test_selected = X_test[:,selected_columns]
X_train_selected.shape, X_test_selected.shape

((32561, 53), (16281, 53))

### Vanilla SVM

In [11]:
svc=SVC()
svc.fit(X_train, y_train)
svc.score(X_test,y_test)

0.8527117498925127

### Minimize based implementation

In [15]:
lmbd = 0.001
y = y_train
X = X_train_selected
def MEM(alpha):
    w = (y*X)@alpha
    return np.mean(np.maximum(0,(1-w)*0.5, 1 -w))+lmbd*np.linalg.norm(alpha,ord=2)**2

In [16]:
MEM(np.ones(X.shape[1]))

1.5331550683893096

In [17]:
res = minimize(MEM, np.ones(X.shape[1]))

In [20]:
# test score
accuracy_score(y_test, np.sign(X_test_selected@res.x))

0.8441127694859039

### SGD-based implementation

In [23]:
alpha = minimize_likelihood(X_train_selected, y_train, 10, max_iter=100, lr=1e-2)

HBox(children=(IntProgress(value=0), HTML(value='')))




In [24]:
# test score
accuracy_score(y_test, np.sign(X_test_selected@alpha))

0.8033904551317487