In [1]:
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from scipy.linalg import lu

In [2]:
def check_spanrd(vectors, d):
    """
    Inputs:
        - vectors (array): matrix (N, d)
        - d (int): dimension of the space to be spanned
    Return:
        - True or False
    """
    # https://math.stackexchange.com/questions/56201/how-to-tell-if-a-set-of-vectors-spans-a-space
    # https://stackoverflow.com/questions/15638650/is-there-a-standard-solution-for-gauss-elimination-in-python
    pl, u = lu(vectors, permute_l=True)
    rank = np.linalg.matrix_rank(u)
    return d == int(rank)

In [3]:
# load data

data_path = "jester_data_40jokes_19181users.npy"

ratings = np.load(data_path)
print("Loaded dataset: {}".format(ratings.shape))

n_users, n_items = ratings.shape
ratings = ratings / 10  # normalize ratings
print("ratings: max {0} - min {1}".format(ratings.max(), ratings.min()))

Loaded dataset: (19181, 40)
ratings: max 0.9710000000000001 - min -0.9949999999999999


In [4]:
# SVD

K = 36
U, s, Vt = svds(ratings, k=K)
s = np.diag(s)
U = np.dot(U, s)

# MSE
Yhat = U.dot(Vt)
rmse = np.sqrt(np.mean(np.abs(Yhat - ratings) ** 2))
print("K: ", K)
print("RMSE:", rmse)
print("MAX_ERR:", np.abs(Yhat - ratings).max())

K:  36
RMSE: 0.09934649260590526
MAX_ERR: 1.4339307776635835


In [5]:
def save_model(net, normalize=False):

    # Build features
    X_pred = X

    hidden_layer_sizes = list(net.hidden_layer_sizes)

    layer_units = [X_pred.shape[1]] + hidden_layer_sizes + [1]
    activations = [X_pred]
    for i in range(net.n_layers_ - 1):
        activations.append(np.empty((X_pred.shape[0], layer_units[i + 1])))

    net._forward_pass(activations)
    y_pred = activations[-1]
    print("MSE (original):", np.mean((y_pred.flatten() - y) ** 2))

    # get weights
    last_w = net.coefs_[-1]
    bias = np.array(net.intercepts_[-1]).reshape((1, 1))
    last_w = np.concatenate([last_w, bias])

    # get last-layer features
    last_feat = np.array(activations[-2], dtype=np.float32)
    last_feat = np.concatenate([last_feat, np.ones((X_pred.shape[0], 1))], axis=1)

    # get prediction
    pred = last_feat.dot(last_w)
    print("MSE (recomputed with last layer only):", np.mean((pred.flatten() - y) ** 2))

    # get feature matrix
    d = hidden_layer_sizes[-1] + 1
    print("d={0}".format(d))
    phi = np.empty((n_users, n_items, d), dtype=np.float32)
    idx = 0
    for t in range(n_users):
        for z in range(n_items):
            phi[t, z, :] = last_feat[idx, :] / (np.linalg.norm(last_feat[idx, :]) if normalize else 1)
            idx += 1
    assert idx == last_feat.shape[0]

    # get param
    theta = np.array(last_w, dtype=np.float32).squeeze()
    if normalize:
        theta = theta / np.linalg.norm(theta)
        
    phi_norm = round(np.linalg.norm(phi, axis=2).max(), 2)
    print("phi max norm:", phi_norm)
    theta_norm = round(np.linalg.norm(theta), 2)
    print("theta norm:", theta_norm)

    # check predictions
    mu = phi.dot(theta)
    print("MSE (mu):", np.mean(np.abs(ratings - mu).flatten()**2))
    print("mu: max {0} - min {1}".format(mu.max(), mu.min()))
    gap = np.max(mu, axis=1)[:, np.newaxis] - mu
    print("gap max:", gap.max())
    gap[gap == 0] = 100
    print("gap min:", gap.min())
    gap = np.min(gap, axis=1)
    print("# contexts with gap_min > 0.001:", np.sum(gap > 0.001))
    print("# contexts with gap_min > 0.01:", np.sum(gap > 0.01))
    print("# contexts with gap_min > 0.1:", np.sum(gap > 0.1))

    # check span
    astar = np.argmax(mu, axis=1)
    fstar = np.array([phi[x, astar[x]] for x in range(n_users)])

    span = d
    for i in range(d):
        if check_spanrd(fstar, d - i):
            span = d - i
            break

    print("{0}Spanning R^{1}".format("WARNING: " if span == d else "", span))
    
    # compute lambda HLS
    
    outer = np.matmul(fstar.T, fstar) / n_users
    lambda_hls = np.linalg.eigvals(outer).min()
    print("lambda HLS:", lambda_hls)

    # save
    np.savez_compressed('jester_d{0}_span{1}_L{2:.2f}_S{3:.2f}_hls{4:.5f}.npz'.format(d,span,phi_norm,theta_norm, lambda_hls), 
                        features=phi, theta=theta)
    
    return mu

In [17]:
# fit large "ground-truth" network

hidden = 32
test_size=0.25

X, y = [], []
for t in range(n_users):
    for z in range(n_items):
        feat = np.concatenate([U[t], Vt[:, z]]).ravel()
        X.append(feat)
        y.append(ratings[t, z])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
print("Training NN -- Size {0}".format((hidden, hidden)))
net = MLPRegressor(hidden_layer_sizes=(hidden, hidden), max_iter=500, verbose=True).fit(X_train, y_train)
print("R^2:", net.score(X_test, y_test))
print()
print("Saving model...")
mu = save_model(net)
print()

Training NN -- Size (32, 32)
Iteration 1, loss = 0.06032827
Iteration 2, loss = 0.02434696
Iteration 3, loss = 0.01662422
Iteration 4, loss = 0.01492757
Iteration 5, loss = 0.01425305
Iteration 6, loss = 0.01388699
Iteration 7, loss = 0.01364689
Iteration 8, loss = 0.01348789
Iteration 9, loss = 0.01337995
Iteration 10, loss = 0.01323149
Iteration 11, loss = 0.01312550
Iteration 12, loss = 0.01306423
Iteration 13, loss = 0.01299734
Iteration 14, loss = 0.01290144
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Stopping.
R^2: 0.900135596093936

Saving model...
MSE (original): 0.025317196528455283
MSE (recomputed with last layer only): 0.025317196520806905
d=33
phi max norm: 4.95
theta norm: 4.09
MSE (mu): 0.02531719568480024
mu: max 1.5421335697174072 - min -1.9387906789779663
gap max: 3.3288007
gap min: 4.172325e-06
# contexts with gap_min > 0.001: 18959
# contexts with gap_min > 0.01: 17037
# contexts with gap_min > 0.1: 5437
lambda HLS: 0.001856959



In [18]:
# fit smaller networks

hidden_small = [256]
ds = [30, 28, 26, 24, 22, 20]
nets = {}

# redefine targets based on the outputs of the larger network (the input is still X)
y_mu = []
for t in range(n_users):
    for z in range(n_items):
        y_mu.append(mu[t, z])
y_mu = np.array(y_mu)

del(mu)
del(net)

# X_train, X_test, y_train, y_test = train_test_split(X, y_mu, test_size=test_size)

for j in ds:
    size = hidden_small + [j]
    print("Training NN -- Size {0}".format(size))
    nets[j] = MLPRegressor(hidden_layer_sizes=size, max_iter=500, tol=1e-6, verbose=True).fit(X, y_mu)
    print("R^2 (size {0}): {1}".format(j, nets[j].score(X, y_mu)))
    print()
    print("Saving model...")
    save_model(nets[j])
    print()
    nets[j] = None

Training NN -- Size [256, 30]
Iteration 1, loss = 0.01249646
Iteration 2, loss = 0.00249061
Iteration 3, loss = 0.00179495
Iteration 4, loss = 0.00150755
Iteration 5, loss = 0.00135836
Iteration 6, loss = 0.00126623
Iteration 7, loss = 0.00120708
Iteration 8, loss = 0.00116342
Iteration 9, loss = 0.00112710
Iteration 10, loss = 0.00109337
Iteration 11, loss = 0.00107069
Iteration 12, loss = 0.00104686
Iteration 13, loss = 0.00102835
Iteration 14, loss = 0.00101479
Iteration 15, loss = 0.00099417
Iteration 16, loss = 0.00097794
Iteration 17, loss = 0.00096156
Iteration 18, loss = 0.00094907
Iteration 19, loss = 0.00093359
Iteration 20, loss = 0.00092460
Iteration 21, loss = 0.00091588
Iteration 22, loss = 0.00089937
Iteration 23, loss = 0.00088933
Iteration 24, loss = 0.00088377
Iteration 25, loss = 0.00087046
Iteration 26, loss = 0.00086728
Iteration 27, loss = 0.00085086
Iteration 28, loss = 0.00084028
Iteration 29, loss = 0.00083053
Iteration 30, loss = 0.00082609
Iteration 31, loss 

Iteration 9, loss = 0.00110482
Iteration 10, loss = 0.00107546
Iteration 11, loss = 0.00105031
Iteration 12, loss = 0.00102116
Iteration 13, loss = 0.00100598
Iteration 14, loss = 0.00098168
Iteration 15, loss = 0.00096804
Iteration 16, loss = 0.00095368
Iteration 17, loss = 0.00093887
Iteration 18, loss = 0.00092397
Iteration 19, loss = 0.00091062
Iteration 20, loss = 0.00089745
Iteration 21, loss = 0.00088883
Iteration 22, loss = 0.00087691
Iteration 23, loss = 0.00086811
Iteration 24, loss = 0.00085637
Iteration 25, loss = 0.00084583
Iteration 26, loss = 0.00083490
Iteration 27, loss = 0.00082888
Iteration 28, loss = 0.00081765
Iteration 29, loss = 0.00081047
Iteration 30, loss = 0.00080219
Iteration 31, loss = 0.00079515
Iteration 32, loss = 0.00078489
Iteration 33, loss = 0.00077700
Iteration 34, loss = 0.00077046
Iteration 35, loss = 0.00076306
Iteration 36, loss = 0.00075883
Iteration 37, loss = 0.00074536
Iteration 38, loss = 0.00074448
Iteration 39, loss = 0.00073388
Iteration

Iteration 26, loss = 0.00096781
Iteration 27, loss = 0.00095465
Iteration 28, loss = 0.00095246
Iteration 29, loss = 0.00094195
Iteration 30, loss = 0.00093470
Iteration 31, loss = 0.00092760
Iteration 32, loss = 0.00091771
Iteration 33, loss = 0.00091223
Iteration 34, loss = 0.00090777
Iteration 35, loss = 0.00089805
Iteration 36, loss = 0.00089309
Iteration 37, loss = 0.00088592
Iteration 38, loss = 0.00087947
Iteration 39, loss = 0.00087790
Iteration 40, loss = 0.00087034
Iteration 41, loss = 0.00086290
Iteration 42, loss = 0.00085806
Iteration 43, loss = 0.00085196
Iteration 44, loss = 0.00084674
Iteration 45, loss = 0.00084231
Iteration 46, loss = 0.00083883
Iteration 47, loss = 0.00083282
Iteration 48, loss = 0.00082589
Iteration 49, loss = 0.00081850
Iteration 50, loss = 0.00081734
Iteration 51, loss = 0.00081276
Iteration 52, loss = 0.00080870
Iteration 53, loss = 0.00080194
Iteration 54, loss = 0.00079739
Iteration 55, loss = 0.00079421
Iteration 56, loss = 0.00078671
Iteratio