In [1]:
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from scipy.linalg import lu

In [2]:
def check_spanrd(vectors, d):
    """
    Inputs:
        - vectors (array): matrix (N, d)
        - d (int): dimension of the space to be spanned
    Return:
        - True or False
    """
    # https://math.stackexchange.com/questions/56201/how-to-tell-if-a-set-of-vectors-spans-a-space
    # https://stackoverflow.com/questions/15638650/is-there-a-standard-solution-for-gauss-elimination-in-python
    pl, u = lu(vectors, permute_l=True)
    rank = np.linalg.matrix_rank(u)
    return d == int(rank)

In [3]:
# load data

data_path = "jester_data_40jokes_19181users.npy"

ratings = np.load(data_path)
print("Loaded dataset: {}".format(ratings.shape))

n_users, n_items = ratings.shape
ratings = ratings / 10  # normalize ratings
print("ratings: max {0} - min {1}".format(ratings.max(), ratings.min()))

Loaded dataset: (19181, 40)
ratings: max 0.9710000000000001 - min -0.9949999999999999


In [4]:
# SVD

K = 36
U, s, Vt = svds(ratings, k=K)
s = np.diag(s)
U = np.dot(U, s)

# MSE
Yhat = U.dot(Vt)
rmse = np.sqrt(np.mean(np.abs(Yhat - ratings) ** 2))
print("K: ", K)
print("RMSE:", rmse)
print("MAX_ERR:", np.abs(Yhat - ratings).max())

K:  36
RMSE: 0.09934649260590528
MAX_ERR: 1.433930777663576


In [5]:
def save_model(net, normalize=False):

    # Build features
    X_pred = X

    hidden_layer_sizes = list(net.hidden_layer_sizes)

    layer_units = [X_pred.shape[1]] + hidden_layer_sizes + [1]
    activations = [X_pred]
    for i in range(net.n_layers_ - 1):
        activations.append(np.empty((X_pred.shape[0], layer_units[i + 1])))

    net._forward_pass(activations)
    y_pred = activations[-1]
    print("MSE (original):", np.mean((y_pred.flatten() - y) ** 2))

    # get weights
    last_w = net.coefs_[-1]
    bias = np.array(net.intercepts_[-1]).reshape((1, 1))
    last_w = np.concatenate([last_w, bias])

    # get last-layer features
    last_feat = np.array(activations[-2], dtype=np.float32)
    last_feat = np.concatenate([last_feat, np.ones((X_pred.shape[0], 1))], axis=1)

    # get prediction
    pred = last_feat.dot(last_w)
    print("MSE (recomputed with last layer only):", np.mean((pred.flatten() - y) ** 2))

    # get feature matrix
    d = hidden_layer_sizes[-1] + 1
    print("d={0}".format(d))
    phi = np.empty((n_users, n_items, d), dtype=np.float32)
    idx = 0
    for t in range(n_users):
        for z in range(n_items):
            phi[t, z, :] = last_feat[idx, :] / (np.linalg.norm(last_feat[idx, :]) if normalize else 1)
            idx += 1
    assert idx == last_feat.shape[0]

    # get param
    theta = np.array(last_w, dtype=np.float32).squeeze()
    if normalize:
        theta = theta / np.linalg.norm(theta)
        
    phi_norm = round(np.linalg.norm(phi, axis=2).max(), 2)
    print("phi max norm:", phi_norm)
    theta_norm = round(np.linalg.norm(theta), 2)
    print("theta norm:", theta_norm)

    # check predictions
    mu = phi.dot(theta)
    print("MSE (mu):", np.mean(np.abs(ratings - mu).flatten()**2))
    print("mu: max {0} - min {1}".format(mu.max(), mu.min()))
    gap = np.max(mu, axis=1)[:, np.newaxis] - mu
    print("gap max:", gap.max())
    gap[gap == 0] = 100
    print("gap min:", gap.min())
    gap = np.min(gap, axis=1)
    print("# contexts with gap_min > 0.001:", np.sum(gap > 0.001))
    print("# contexts with gap_min > 0.01:", np.sum(gap > 0.01))
    print("# contexts with gap_min > 0.1:", np.sum(gap > 0.1))

    # check span
    astar = np.argmax(mu, axis=1)
    fstar = np.array([phi[x, astar[x]] for x in range(n_users)])

    span = d
    for i in range(d):
        if check_spanrd(fstar, d - i):
            span = d - i
            break

    print("{0}Spanning R^{1}".format("WARNING: " if span == d else "", span))
    
    # compute lambda HLS
    
    outer = np.matmul(fstar.T, fstar) / n_users
    lambda_hls = np.linalg.eigvals(outer).min()
    print("lambda HLS:", lambda_hls)

    # save
    np.savez_compressed('jester_d{0}_span{1}_L{2:.2f}_S{3:.2f}_hls{4:.5f}.npz'.format(d,span,phi_norm,theta_norm, lambda_hls), 
                        features=phi, theta=theta)
    
    return mu

In [6]:
# fit large "ground-truth" network

hidden = 32
test_size=0.25

X, y = [], []
for t in range(n_users):
    for z in range(n_items):
        feat = np.concatenate([U[t], Vt[:, z]]).ravel()
        X.append(feat)
        y.append(ratings[t, z])
X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
print("Training NN -- Size {0}".format((hidden, hidden)))
net = MLPRegressor(hidden_layer_sizes=(hidden, hidden), max_iter=500, verbose=True).fit(X_train, y_train)
print("R^2:", net.score(X_test, y_test))
print()
print("Saving model...")
mu = save_model(net)
print()

Training NN -- Size (32, 32)
Iteration 1, loss = 0.06014529
Iteration 2, loss = 0.02637442
Iteration 3, loss = 0.01731585
Iteration 4, loss = 0.01531637
Iteration 5, loss = 0.01464621
Iteration 6, loss = 0.01418959
Iteration 7, loss = 0.01391614
Iteration 8, loss = 0.01371910
Iteration 9, loss = 0.01356682
Iteration 10, loss = 0.01345674
Iteration 11, loss = 0.01334774
Iteration 12, loss = 0.01323710
Iteration 13, loss = 0.01318707
Iteration 14, loss = 0.01308295
Iteration 15, loss = 0.01304774
Iteration 16, loss = 0.01298269
Iteration 17, loss = 0.01294220
Iteration 18, loss = 0.01289731
Iteration 19, loss = 0.01286049
Iteration 20, loss = 0.01281348
Iteration 21, loss = 0.01278979
Iteration 22, loss = 0.01273593
Iteration 23, loss = 0.01275899
Iteration 24, loss = 0.01268236
Iteration 25, loss = 0.01267461
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.
R^2: 0.9023820099613543

Saving model...
MSE (original): 0.024825651818711235
MSE (recompu

In [None]:
# fit smaller networks

hidden_small = [256]
ds = [30, 28, 26, 24, 22, 20]
nets = {}

# redefine targets based on the outputs of the larger network (the input is still X)
y_mu = []
for t in range(n_users):
    for z in range(n_items):
        y_mu.append(mu[t, z])
y_mu = np.array(y_mu)

del(mu)
del(net)

# X_train, X_test, y_train, y_test = train_test_split(X, y_mu, test_size=test_size)

for j in ds:
    size = hidden_small + [j]
    print("Training NN -- Size {0}".format(size))
    nets[j] = MLPRegressor(hidden_layer_sizes=size, max_iter=500, tol=1e-6, verbose=True).fit(X, y_mu)
    print("R^2 (size {0}): {1}".format(j, nets[j].score(X, y_mu)))
    print()
    print("Saving model...")
    save_model(nets[j])
    print()
    nets[j] = None

Training NN -- Size [256, 30]
Iteration 1, loss = 0.01261878
Iteration 2, loss = 0.00246523
Iteration 3, loss = 0.00171477
Iteration 4, loss = 0.00141680
Iteration 5, loss = 0.00128156
Iteration 6, loss = 0.00118930
Iteration 7, loss = 0.00112670
Iteration 8, loss = 0.00108683
Iteration 9, loss = 0.00105028
Iteration 10, loss = 0.00101932
Iteration 11, loss = 0.00099520
Iteration 12, loss = 0.00097183
Iteration 13, loss = 0.00095876
Iteration 14, loss = 0.00093065
Iteration 15, loss = 0.00091927
Iteration 16, loss = 0.00090472
Iteration 17, loss = 0.00089147
Iteration 18, loss = 0.00087762
Iteration 19, loss = 0.00086886
Iteration 20, loss = 0.00085401
Iteration 21, loss = 0.00083934
Iteration 22, loss = 0.00083239
Iteration 23, loss = 0.00082208
Iteration 24, loss = 0.00081425
Iteration 25, loss = 0.00079888
Iteration 26, loss = 0.00079178
Iteration 27, loss = 0.00078154
Iteration 28, loss = 0.00077298
Iteration 29, loss = 0.00076398
Iteration 30, loss = 0.00075274
Iteration 31, loss 

Iteration 84, loss = 0.00054469
Iteration 85, loss = 0.00054244
Iteration 86, loss = 0.00054208
Iteration 87, loss = 0.00054167
Iteration 88, loss = 0.00054041
Iteration 89, loss = 0.00053961
Iteration 90, loss = 0.00053702
Iteration 91, loss = 0.00053622
Iteration 92, loss = 0.00053547
Iteration 93, loss = 0.00053256
Iteration 94, loss = 0.00053274
Iteration 95, loss = 0.00053494
Iteration 96, loss = 0.00053062
Iteration 97, loss = 0.00053109
Iteration 98, loss = 0.00053023
Iteration 99, loss = 0.00052808
Iteration 100, loss = 0.00052837
Iteration 101, loss = 0.00052746
Iteration 102, loss = 0.00052430
Iteration 103, loss = 0.00052638
Iteration 104, loss = 0.00052552
Iteration 105, loss = 0.00052585
Iteration 106, loss = 0.00052397
Iteration 107, loss = 0.00052184
Iteration 108, loss = 0.00052097
Iteration 109, loss = 0.00052156
Iteration 110, loss = 0.00051967
Iteration 111, loss = 0.00051788
Iteration 112, loss = 0.00051810
Iteration 113, loss = 0.00051783
Iteration 114, loss = 0.00

Iteration 145, loss = 0.00051063
Iteration 146, loss = 0.00051294
Iteration 147, loss = 0.00051089
Iteration 148, loss = 0.00050961
Iteration 149, loss = 0.00051081
Iteration 150, loss = 0.00050973
Iteration 151, loss = 0.00051032
Iteration 152, loss = 0.00050979
Iteration 153, loss = 0.00050827
Iteration 154, loss = 0.00051016
Iteration 155, loss = 0.00050976
Iteration 156, loss = 0.00050709
Iteration 157, loss = 0.00050846
Iteration 158, loss = 0.00050994
Iteration 159, loss = 0.00050739
Iteration 160, loss = 0.00050676
Iteration 161, loss = 0.00050727
Iteration 162, loss = 0.00050862
Iteration 163, loss = 0.00050673
Iteration 164, loss = 0.00050840
Iteration 165, loss = 0.00050730
Iteration 166, loss = 0.00050803
Iteration 167, loss = 0.00050813
Training loss did not improve more than tol=0.000001 for 10 consecutive epochs. Stopping.
R^2 (size 26): 0.9973370356566412

Saving model...
MSE (original): 0.024584946151380294
MSE (recomputed with last layer only): 0.02458494614776872
d=27