In [1]:
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from scipy.linalg import lu

In [2]:
def check_spanrd(vectors, d):
    """
    Inputs:
        - vectors (array): matrix (N, d)
        - d (int): dimension of the space to be spanned
    Return:
        - True or False
    """
    # https://math.stackexchange.com/questions/56201/how-to-tell-if-a-set-of-vectors-spans-a-space
    # https://stackoverflow.com/questions/15638650/is-there-a-standard-solution-for-gauss-elimination-in-python
    pl, u = lu(vectors, permute_l=True)
    rank = np.linalg.matrix_rank(u)
    return d == int(rank)

In [3]:
# load data

data_path = "lastfmlog.npy"

ratings = np.load(data_path)
#print(np.mean(ratings), np.sum(ratings > 0), ratings.size)
ratings = (ratings - np.mean(ratings)) / np.std(ratings)

import matplotlib.pyplot as plt
plt.hist(ratings.flatten())
plt.show()

print("Loaded dataset: {}".format(ratings.shape))

n_users, n_items = ratings.shape
print("ratings: max {0} - min {1}".format(ratings.max(), ratings.min()))

<Figure size 640x480 with 1 Axes>

Loaded dataset: (1322, 220)
ratings: max 6.648947490095273 - min -0.3259578617346158


In [4]:
# SVD

K = 150
U, s, Vt = svds(ratings, k=K)
s = np.diag(s)
U = np.dot(U, s)

# MSE
Yhat = U.dot(Vt)
rmse = np.sqrt(np.mean(np.abs(Yhat - ratings) ** 2))
print("K: ", K)
print("RMSE:", rmse)
print("MAX_ERR:", np.abs(Yhat - ratings).max())

K:  150
RMSE: 0.2765351286203227
MAX_ERR: 2.876380863604467


In [5]:
def save_model(net, normalize=False):

    # Build features
    X_pred = X

    hidden_layer_sizes = list(net.hidden_layer_sizes)

    layer_units = [X_pred.shape[1]] + hidden_layer_sizes + [1]
    activations = [X_pred]
    for i in range(net.n_layers_ - 1):
        activations.append(np.empty((X_pred.shape[0], layer_units[i + 1])))

    net._forward_pass(activations)
    y_pred = activations[-1]
    print("MSE (original):", np.mean((y_pred.flatten() - y) ** 2))

    # get weights
    last_w = net.coefs_[-1]
    bias = np.array(net.intercepts_[-1]).reshape((1, 1))
    last_w = np.concatenate([last_w, bias])

    # get last-layer features
    last_feat = np.array(activations[-2], dtype=np.float32)
    last_feat = np.concatenate([last_feat, np.ones((X_pred.shape[0], 1))], axis=1)

    # get prediction
    pred = last_feat.dot(last_w)
    print("MSE (recomputed with last layer only):", np.mean((pred.flatten() - y) ** 2))

    # get feature matrix
    d = hidden_layer_sizes[-1] + 1
    print("d={0}".format(d))
    phi = np.empty((n_users, n_items, d), dtype=np.float32)
    idx = 0
    for t in range(n_users):
        for z in range(n_items):
            phi[t, z, :] = last_feat[idx, :] / (np.linalg.norm(last_feat[idx, :]) if normalize else 1)
            idx += 1
    assert idx == last_feat.shape[0]

    # get param
    theta = np.array(last_w, dtype=np.float32).squeeze()
    if normalize:
        theta = theta / np.linalg.norm(theta)
        
    phi_norm = round(np.linalg.norm(phi, axis=2).max(), 2)
    print("phi max norm:", phi_norm)
    theta_norm = round(np.linalg.norm(theta), 2)
    print("theta norm:", theta_norm)

    # check predictions
    mu = phi.dot(theta)
    print("MSE (mu):", np.mean(np.abs(ratings - mu).flatten()**2))
    print("mu: max {0} - min {1}".format(mu.max(), mu.min()))
    gap = np.max(mu, axis=1)[:, np.newaxis] - mu
    print("gap max:", gap.max())
    gap[gap == 0] = 100
    print("gap min:", gap.min())
    gap = np.min(gap, axis=1)
    print("# contexts with gap_min > 0.001:", np.sum(gap > 0.001))
    print("# contexts with gap_min > 0.01:", np.sum(gap > 0.01))
    print("# contexts with gap_min > 0.1:", np.sum(gap > 0.1))

    # check span
    astar = np.argmax(mu, axis=1)
    fstar = np.array([phi[x, astar[x]] for x in range(n_users)])

    span = d
    for i in range(d):
        if check_spanrd(fstar, d - i):
            span = d - i
            break

    print("{0}Spanning R^{1}".format("WARNING: " if span == d else "", span))
    
    # compute lambda HLS
    
    outer = np.matmul(fstar.T, fstar) / n_users
    lambda_hls = np.linalg.eigvals(outer).min()
    print("lambda HLS:", lambda_hls)

    # save
#     np.savez_compressed('lastfm_d{0}_span{1}_L{2:.2f}_S{3:.2f}_hls{4:.5f}.npz'.format(d,span,phi_norm,theta_norm, lambda_hls), 
#                         features=phi, theta=theta)
    np.savez_compressed('lastfm_d{0}_span{1}.npz'.format(d,span), features=phi, theta=theta)
    
    return mu

In [6]:
# generate datasets

X, y = [], []
for t in range(n_users):
    for z in range(n_items):
        feat = np.concatenate([U[t], Vt[:, z]]).ravel()
        X.append(feat)
        y.append(ratings[t, z])
X = np.array(X)
X = (X - np.mean(X, axis=0, keepdims=True)) / np.std(X, axis=0, keepdims=True)
y = np.array(y)

In [7]:
# fit networks

# hidden = [256, 256]
# ds = [5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]

test_size=0.25

hidden_low = 50
hidden_high = 200
ds = np.arange(46) + 5
ds = ds.tolist()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

best_d = None
max_score = 0

n_models = 20

for i in range(n_models):
    hidden = np.random.randint(hidden_low, hidden_high)
    d = np.random.choice(ds)
    ds.remove(d)
    print(ds)
    
    size = [hidden]
    if np.random.randint(2) == 1:
        size += [hidden]
    size += [d]

    print("Training NN -- Size {0}".format(size))
    net = MLPRegressor(hidden_layer_sizes=size, max_iter=500, verbose=True).fit(X_train, y_train)
    score = net.score(X_test, y_test)
    print("R^2 (size {0}): {1}".format(d, score))
    if score > max_score:
        max_score = score
        best_d = d
    print("Best so so far: d={0} with R^2: {1}".format(best_d, max_score))
    print()
    print("Saving model...")
    save_model(net)
    del(net)
    print()

print("Maximum R^2: {0} - d={1}".format(max_score, best_d))

[5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50]
Training NN -- Size [83, 38]
Iteration 1, loss = 0.43268726
Iteration 2, loss = 0.32261485
Iteration 3, loss = 0.26908904
Iteration 4, loss = 0.23441425
Iteration 5, loss = 0.20864923
Iteration 6, loss = 0.18765308
Iteration 7, loss = 0.16980527
Iteration 8, loss = 0.15485389
Iteration 9, loss = 0.14320669
Iteration 10, loss = 0.13371424
Iteration 11, loss = 0.12478202
Iteration 12, loss = 0.11696978
Iteration 13, loss = 0.11099414
Iteration 14, loss = 0.10556416
Iteration 15, loss = 0.10114404
Iteration 16, loss = 0.09666004
Iteration 17, loss = 0.09296340
Iteration 18, loss = 0.08911650
Iteration 19, loss = 0.08640322
Iteration 20, loss = 0.08318930
Iteration 21, loss = 0.07949193
Iteration 22, loss = 0.07704253
Iteration 23, loss = 0.07417611
Iteration 24, loss = 0.07167044
Iteration 25, loss = 0.07021562
Iter

Iteration 14, loss = 0.03334356
Iteration 15, loss = 0.03201895
Iteration 16, loss = 0.02841670
Iteration 17, loss = 0.02881338
Iteration 18, loss = 0.02686945
Iteration 19, loss = 0.02668306
Iteration 20, loss = 0.02492584
Iteration 21, loss = 0.02573857
Iteration 22, loss = 0.02418994
Iteration 23, loss = 0.02353164
Iteration 24, loss = 0.02291382
Iteration 25, loss = 0.02321300
Iteration 26, loss = 0.02159530
Iteration 27, loss = 0.02119009
Iteration 28, loss = 0.01943185
Iteration 29, loss = 0.02027242
Iteration 30, loss = 0.02196137
Iteration 31, loss = 0.01842263
Iteration 32, loss = 0.01913085
Iteration 33, loss = 0.01774196
Iteration 34, loss = 0.01987111
Iteration 35, loss = 0.01774628
Iteration 36, loss = 0.01640196
Iteration 37, loss = 0.01704667
Iteration 38, loss = 0.01549132
Iteration 39, loss = 0.01574106
Iteration 40, loss = 0.01690628
Iteration 41, loss = 0.01462669
Iteration 42, loss = 0.01611594
Iteration 43, loss = 0.01592318
Iteration 44, loss = 0.01361727
Iteratio

Iteration 22, loss = 0.04737184
Iteration 23, loss = 0.04677774
Iteration 24, loss = 0.04436162
Iteration 25, loss = 0.04320165
Iteration 26, loss = 0.04311047
Iteration 27, loss = 0.04313566
Iteration 28, loss = 0.04040533
Iteration 29, loss = 0.03974548
Iteration 30, loss = 0.03836733
Iteration 31, loss = 0.03838724
Iteration 32, loss = 0.03832966
Iteration 33, loss = 0.03722778
Iteration 34, loss = 0.03514612
Iteration 35, loss = 0.03549898
Iteration 36, loss = 0.03517404
Iteration 37, loss = 0.03374588
Iteration 38, loss = 0.03354777
Iteration 39, loss = 0.03310621
Iteration 40, loss = 0.03220856
Iteration 41, loss = 0.03288189
Iteration 42, loss = 0.03178171
Iteration 43, loss = 0.03104920
Iteration 44, loss = 0.03079451
Iteration 45, loss = 0.03038558
Iteration 46, loss = 0.02871424
Iteration 47, loss = 0.03069981
Iteration 48, loss = 0.02967848
Iteration 49, loss = 0.02763391
Iteration 50, loss = 0.02828420
Iteration 51, loss = 0.02793522
Iteration 52, loss = 0.02911797
Training

Iteration 21, loss = 0.07229524
Iteration 22, loss = 0.06985758
Iteration 23, loss = 0.06746905
Iteration 24, loss = 0.06520739
Iteration 25, loss = 0.06327510
Iteration 26, loss = 0.06127362
Iteration 27, loss = 0.05979240
Iteration 28, loss = 0.05793389
Iteration 29, loss = 0.05719622
Iteration 30, loss = 0.05545148
Iteration 31, loss = 0.05412282
Iteration 32, loss = 0.05263762
Iteration 33, loss = 0.05183867
Iteration 34, loss = 0.05087307
Iteration 35, loss = 0.04937144
Iteration 36, loss = 0.04831563
Iteration 37, loss = 0.04762664
Iteration 38, loss = 0.04604153
Iteration 39, loss = 0.04502852
Iteration 40, loss = 0.04460963
Iteration 41, loss = 0.04334505
Iteration 42, loss = 0.04299720
Iteration 43, loss = 0.04236579
Iteration 44, loss = 0.04130850
Iteration 45, loss = 0.04084696
Iteration 46, loss = 0.04040747
Iteration 47, loss = 0.03950550
Iteration 48, loss = 0.03935537
Iteration 49, loss = 0.03839440
Iteration 50, loss = 0.03737080
Iteration 51, loss = 0.03712950
Iteratio

Iteration 3, loss = 0.27795809
Iteration 4, loss = 0.23934965
Iteration 5, loss = 0.21351340
Iteration 6, loss = 0.19421344
Iteration 7, loss = 0.18151016
Iteration 8, loss = 0.17144629
Iteration 9, loss = 0.16164324
Iteration 10, loss = 0.15478831
Iteration 11, loss = 0.14875048
Iteration 12, loss = 0.14311132
Iteration 13, loss = 0.14020088
Iteration 14, loss = 0.13421639
Iteration 15, loss = 0.13155554
Iteration 16, loss = 0.12777917
Iteration 17, loss = 0.12557700
Iteration 18, loss = 0.12268598
Iteration 19, loss = 0.11996395
Iteration 20, loss = 0.11782678
Iteration 21, loss = 0.11514037
Iteration 22, loss = 0.11285913
Iteration 23, loss = 0.11146527
Iteration 24, loss = 0.10893189
Iteration 25, loss = 0.10829161
Iteration 26, loss = 0.10508877
Iteration 27, loss = 0.10407498
Iteration 28, loss = 0.10340182
Iteration 29, loss = 0.09993927
Iteration 30, loss = 0.10081987
Iteration 31, loss = 0.09868670
Iteration 32, loss = 0.09924704
Iteration 33, loss = 0.09702144
Iteration 34, l


[5, 6, 10, 11, 12, 14, 15, 16, 17, 18, 20, 21, 22, 25, 26, 29, 30, 31, 32, 33, 34, 35, 40, 41, 42, 44, 47, 48, 49, 50]
Training NN -- Size [176, 176, 37]
Iteration 1, loss = 0.40819522
Iteration 2, loss = 0.27163729
Iteration 3, loss = 0.18021263
Iteration 4, loss = 0.12517710
Iteration 5, loss = 0.09422983
Iteration 6, loss = 0.07465787
Iteration 7, loss = 0.06319286
Iteration 8, loss = 0.05547870
Iteration 9, loss = 0.04929311
Iteration 10, loss = 0.04712947
Iteration 11, loss = 0.04250448
Iteration 12, loss = 0.03898153
Iteration 13, loss = 0.03849628
Iteration 14, loss = 0.03465797
Iteration 15, loss = 0.03362038
Iteration 16, loss = 0.03445195
Iteration 17, loss = 0.03013601
Iteration 18, loss = 0.02838429
Iteration 19, loss = 0.02852175
Iteration 20, loss = 0.02766529
Iteration 21, loss = 0.02574474
Iteration 22, loss = 0.02668052
Iteration 23, loss = 0.02460395
Iteration 24, loss = 0.02283955
Iteration 25, loss = 0.02417340
Iteration 26, loss = 0.02141474
Iteration 27, loss = 0

Iteration 55, loss = 0.04618670
Iteration 56, loss = 0.04561178
Iteration 57, loss = 0.04428686
Iteration 58, loss = 0.04435514
Iteration 59, loss = 0.04331247
Iteration 60, loss = 0.04386392
Iteration 61, loss = 0.04291931
Iteration 62, loss = 0.04187090
Iteration 63, loss = 0.04137033
Iteration 64, loss = 0.04166160
Iteration 65, loss = 0.04142098
Iteration 66, loss = 0.04063393
Iteration 67, loss = 0.04003497
Iteration 68, loss = 0.03979547
Iteration 69, loss = 0.03975977
Iteration 70, loss = 0.03950337
Iteration 71, loss = 0.03757253
Iteration 72, loss = 0.03859849
Iteration 73, loss = 0.03798220
Iteration 74, loss = 0.03703102
Iteration 75, loss = 0.03724178
Iteration 76, loss = 0.03627981
Iteration 77, loss = 0.03645597
Iteration 78, loss = 0.03689822
Iteration 79, loss = 0.03552495
Iteration 80, loss = 0.03546803
Iteration 81, loss = 0.03495024
Iteration 82, loss = 0.03495869
Iteration 83, loss = 0.03430468
Iteration 84, loss = 0.03498860
Iteration 85, loss = 0.03414380
Iteratio