In [1]:
import gzip
import implicit
import random
import numpy as np
from collections import defaultdict
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LogisticRegression
import warnings
warnings.filterwarnings('ignore')

In [2]:
def readGz(path):
    for l in gzip.open(path, 'rt'):
        yield eval(l)

def readCSV(path):
    f = gzip.open(path, 'rt')
    f.readline()
    for l in f:
        yield l.strip().split(',')

In [3]:
example = readCSV("train_Interactions.csv.gz")
print(next(example))
del example

['u79354815', 'b14275065', '4']


In [4]:
### Ratings Prediction
train_size = 195000
data       = [line for line in readCSV("train_Interactions.csv.gz")]
# random.shuffle(data)
train      = data[:train_size]
val        = data[train_size:]

allRatings = []
userBookRatings = defaultdict(lambda: defaultdict(float))
userRatings = defaultdict(list)
userBooks   = defaultdict(set)
bookUsers   = defaultdict(set)
all_users    = set()
all_books    = set()

for user, book, rating in train:
    all_users.add(user)
    all_books.add(book)
    rating = int(rating)
    allRatings.append(rating)
    userRatings[user].append(rating)
    userBookRatings[user][book] = rating
    userBooks[user].add(book)
    bookUsers[book].add(user)

globalAverage = sum(allRatings) / len(allRatings)
userAverage = {}
for user in userRatings:
    userAverage[user] = sum(userRatings[user]) / len(userRatings[user])

In [5]:
# Coordinate Descent
def coordinate_descent(lambda_opt = 1, iterations = 400):

    alpha_sum, bu_sum, bb_sum = 0, 0, 0

    train_len = len(train)
    bu = defaultdict(lambda: 1)
    bb = defaultdict(lambda: 1)

    for descent in range(iterations):
        alpha_sum = 0
        for user, book, _ in train:
            alpha_sum += userBookRatings[user][book] - (bu[user] + bb[book])
        alpha = alpha_sum / train_len

        for user in userRatings:
            bu_sum = 0
            for book in userBooks[user]:
                bu_sum += userBookRatings[user][book] - (alpha + bb[book])
            bu[user] = bu_sum / (lambda_opt + len(userBooks[user]))

        for book in bookUsers:
            bb_sum = 0
            for user in bookUsers[book]:
                bb_sum += userBookRatings[user][book] - (alpha + bu[user])
            bb[book] = bb_sum / (lambda_opt + len(bookUsers[book]))
            
    return alpha, bu, bb

# alpha, bu, bb = coordinate_descent(lambda_opt = 1, iterations = 100)

In [6]:
alpha, bu, bb = coordinate_descent(2.8, iterations = 100)

diff = 0
for user, book, rating in val:
    user_rating = alpha + bu[user] + bb[book]
    diff += (user_rating - int(rating)) ** 2
    MSE = diff / len(val)
print(MSE)

1.1121616794664089


In [7]:
# lambda_values = np.logspace(0, 1, num = 20)

# MSEs = []
# loop_count = 0
# for lambda_opt in lambda_values:
#     loop_count += 1; print(loop_count, end = ', ')
#     alpha, bu, bb = coordinate_descent(lambda_opt, iterations = 20)
#     rating_labels = []
#     diff = 0
#     for user, book, rating in val:
#         user_rating = alpha + bu[user] + bb[book]
#         diff += (user_rating - int(rating)) ** 2
#     MSE = diff / len(val)
#     MSEs.append(MSE)

# plt.plot(lambda_values, MSEs, label='Validation')
# plt.ylabel('MSE')
# plt.xlabel('lambda value'), plt.xscale('log')
# plt.title('MSE vs lambda values')
# plt.legend()
# plt.show()

# indx = MSEs.index(min(MSEs))
# print('\nLambda for lowest MSE is:', lambda_values[indx])
# print('\nBest MSE is:', MSEs[indx])

In [11]:
# Gradient Descent 2
step_size = 400 
lambda_opt1 = 1; lambda_opt2 = 1
print(step_size)
train_len = len(train)
k = 4
all_users_lst = list(all_users)
all_books_lst = list(all_books)
all_users_lst.sort()
all_books_lst.sort()

book_indices = defaultdict(int)
for book in userBooks[user]:
    book_ind = all_books_lst.index(book)
    book_indices[book] = book_ind

user_indices = defaultdict(int)
for user in bookUsers[book]:
    user_ind = all_users_lst.index(user)
    user_indices[user] = user_ind

gamma_u = (np.random.rand(len(all_users_lst), k)) * 0.5
gamma_b = (np.random.rand(len(all_books_lst), k)) * 0.5

print('Starting Descent...')
cur_MSE = 2
count = 0
for i in range(100):  
    prev_MSE = cur_MSE

    print('**** Loop:', i, ' ****')
    rating_labels = []
    diff = 0
    for user, book, rating in val:
        if user in all_users_lst and book in all_books_lst: 
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            mult = np.dot(gamma_u[user_ind], gamma_b[book_ind])
            user_rating = alpha + bu[user] + bb[book] + mult
        else:
            user_rating = alpha + bu[user] + bb[book];
        diff += (user_rating - int(rating)) ** 2

    MSE = diff / len(val)
    print('MSE', MSE)
    print('gamma u ave', sum([abs(i) for i in gamma_u[:,0]]) / len(gamma_u))

    cur_MSE = MSE
    if cur_MSE > prev_MSE: 
        count += 1
    else:
        count = 0
    if count > 2:
        print('Breaking...')
        break
        
    alpha_sum = 0
    for user, book, _ in train:
        user_ind = user_indices[user]
        book_ind = book_indices[book]
        gamma_mult = np.dot(gamma_u[user_ind], gamma_b[book_ind])
        alpha_sum += userBookRatings[user][book] - (bu[user] + bb[book] + gamma_mult)
    alpha = alpha_sum / train_len

    # Beta_u
    for user in userRatings:
        bu_sum = 0
        for book in userBooks[user]:
            user_ind = user_indices[user]
            book_ind = book_indices[book]
            gamma_mult = np.dot(gamma_u[user_ind], gamma_b[book_ind])
            bu_sum += userBookRatings[user][book] - (alpha + bb[book] + gamma_mult)
        bu[user] = bu_sum / (lambda_opt1 + len(userBooks[user]))

    # Beta_b
    for book in bookUsers:
        bb_sum = 0
        for user in bookUsers[book]:
            user_ind = user_indices[user]
            book_ind = book_indices[book]
            gamma_mult = np.dot(gamma_u[user_ind], gamma_b[book_ind])
            bb_sum += userBookRatings[user][book] - (alpha + bu[user] + gamma_mult)
        bb[book] = bb_sum / (lambda_opt1 + len(bookUsers[book]))
        
    # Gamma_u
    for user_ind, user in enumerate(all_users_lst):
        gamma_u_sum = 0
        for book in userBooks[user]:
            book_ind = book_indices[book]
            pred1 = alpha + bu[user] + bb[book]
            pred2 = np.dot(gamma_u[user_ind], gamma_b[book_ind])
            rating = userBookRatings[user][book]
            err = rating - (pred1 + pred2)
            gamma_u_sum += err
        gradient = - 2 * gamma_u_sum * gamma_b[book_ind] / train_len
        gamma_u[user_ind] = gamma_u[user_ind] - step_size * gradient * lambda_opt2
    assert max(gamma_u[:,0]) != np.inf
    
    # Gamma_b
    for book_ind, book in enumerate(all_books_lst):
        gamma_b_sum = 0
        for user in bookUsers[book]:
            user_ind = user_indices[user]
            pred1 = alpha + bu[user] + bb[book]
            pred2 = np.dot(gamma_u[user_ind], gamma_b[book_ind])
            rating = userBookRatings[user][book]
            err = rating - (pred1 + pred2)
            gamma_b_sum += err
        gradient = - 2 * gamma_b_sum * gamma_u[user_ind] / train_len
        gamma_b[book_ind] = gamma_b[book_ind] - step_size * gradient * lambda_opt2
    assert max(gamma_b[:,0]) != np.inf
    # print('gamma b ave', sum([abs(i) for i in gamma_b[:,0]]) / len(gamma_b))
    # print('gamma b', max(gamma_b), sum(gamma_b) / len(gamma_b))

400
Starting Descent...
**** Loop: 0  ****
MSE 1.1917721803788708
gamma u ave 0.24835440022538668
**** Loop: 1  ****
MSE 1.1295971696619336
gamma u ave 0.24899528705800714
**** Loop: 2  ****
MSE 1.128742436372259
gamma u ave 0.24963317179442074
**** Loop: 3  ****
MSE 1.1283815474845735
gamma u ave 0.2502683290875013
**** Loop: 4  ****
MSE 1.1281722937773555
gamma u ave 0.25090140837421837
**** Loop: 5  ****
MSE 1.1280644399700082
gamma u ave 0.2515324521608035
**** Loop: 6  ****
MSE 1.1280287148437573
gamma u ave 0.25216126455034427
**** Loop: 7  ****
MSE 1.1280448060526231
gamma u ave 0.2527879285171549
**** Loop: 8  ****
MSE 1.1280984378960353
gamma u ave 0.253412732458983
**** Loop: 9  ****
MSE 1.128179475661946
gamma u ave 0.2540357633856018
Breaking...


In [None]:
# Gradient Descent 2
# 0.02 not bad
step_size = 0.005
lambda_opt = 15;
train_len = len(train)
k = 2
all_users_lst = list(all_users)
all_books_lst = list(all_books)
all_users_lst.sort()
all_books_lst.sort()

gamma_u = (np.random.rand(len(all_users_lst),k) - 0.5) * 0.05
gamma_b = (np.random.rand(len(all_books_lst),k) - 0.5) * 0.05

print('Starting Descent...')
# print('gamma u', gamma_u[:6])
# print('gamma b', gamma_b[:6])
cur_MSE = 2
count = 0
for i in range(20):  
    prev_MSE = cur_MSE
    rating_labels = []
    diff = 0
    for user, book, rating in val:
        if user in all_users_lst and book in all_books_lst: 
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            mult = np.dot(gamma_u[user_ind] * gamma_b[book_ind])
            user_rating = alpha + bu[user] + bb[book]
        else:
            user_rating = alpha + bu[user] + bb[book];
        diff += (user_rating - int(rating)) ** 2

    MSE = diff / len(val)
    print('MSE on the validation set', MSE)
    
    cur_MSE = MSE
    if cur_MSE > prev_MSE: 
        count += 1
    else:
        count = 0
    if count > 2:
        print('Breaking...')
        break
    
    # Gamma_u
    for user_ind, user in enumerate(all_users_lst):
        gamma_u_sum = 0
        for book in userBooks[user]:
            book_ind = all_books_lst.index(book)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            temp = (pred1 + pred2) - rating
            gamma_u_sum += gamma_b[book_ind] * temp
        gradient = 2 * (gamma_u_sum + lambda_opt * gamma_u[user_ind])
        # print(update)
        gamma_u[user_ind] = gamma_u[user_ind] - step_size * gradient
    assert max(gamma_u) != np.inf
    # print('gamma u ave', sum([abs(i) for i in gamma_u]) / len(gamma_u))
    # print('gamma u', max(gamma_u), sum(gamma_u) / len(gamma_u))
    
    # Gamma_b
    for book_ind, book in enumerate(all_books_lst):
        gamma_b_sum = 0
        for user in bookUsers[book]:
            user_ind = all_users_lst.index(user)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            temp = (pred1 + pred2) - rating
            gamma_b_sum += gamma_u[user_ind] * temp
        gradient = 2 * (gamma_b_sum + lambda_opt * gamma_b[book_ind])
        gamma_b[book_ind] = gamma_b[book_ind] - step_size * gradient
    assert max(gamma_b) != np.inf
    # print('gamma b ave', sum([abs(i) for i in gamma_b]) / len(gamma_b))
    # print('gamma b', max(gamma_b), sum(gamma_b) / len(gamma_b))

In [None]:
with open("predictions_Rating.txt", 'w') as predictions:
    for l in open("pairs_Rating.txt"):
        if l.startswith("userID"):
            #header
            predictions.write(l)
            continue
        user, book = l.strip().split('-')
        user_rating = alpha + bu[user] + bb[book] + gamma_u[user_ind] * gamma_b[book_ind]
        predictions.write(user + '-' + book + ',' + str(user_rating) + '\n')

In [None]:
rating_labels = []
diff = 0
for user, book, rating in val:
    if user in all_users_lst and book in all_books_lst: 
        user_ind = all_users_lst.index(user)
        book_ind = all_books_lst.index(book)
        user_rating = alpha + bu[user] + bb[book]# + gamma_u[user_ind] * gamma_b[book_ind]
    else:
        print('here')
        user_rating = alpha + bu[user] + bb[book];
    diff += (user_rating - int(rating)) ** 2

MSE = diff / len(val)
print('MSE on the validation set', MSE)

In [None]:
# GRADEINT DESCENT PREVIOUS

# Gradient Descent 2
# 0.005 not bad
step_size = 0.005
lambda_opt1 = 2.8; lambda_opt2 = 10
train_len = len(train)
k = 1
all_users_lst = list(all_users)
all_books_lst = list(all_books)
all_users_lst.sort()
all_books_lst.sort()

gamma_u = np.random.rand(len(all_users_lst)) - 0.5
gamma_b = np.random.rand(len(all_books_lst)) - 0.5
init_gamma_u = gamma_u
init_gamma_b = gamma_b
print('Starting Descent...')
# print('gamma u', gamma_u[:6])
# print('gamma b', gamma_b[:6])

bu = defaultdict(lambda: 1)
bb = defaultdict(lambda: 1)

for i in range(20):  
    
    rating_labels = []
    diff = 0
    for user, book, rating in val:
        if user in all_users_lst and book in all_books_lst: 
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            user_rating = alpha + bu[user] + bb[book] + gamma_u[user_ind] * gamma_b[book_ind]
        else:
            user_rating = alpha + bu[user] + bb[book];
        diff += (user_rating - int(rating)) ** 2

    MSE = diff / len(val)
    print('MSE on the validation set', MSE)
    
    # Alpha
    alpha_sum = 0
    for user, book, _ in train:
        user_ind = all_users_lst.index(user)
        book_ind = all_books_lst.index(book)
        gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
        alpha_sum += userBookRatings[user][book] - (bu[user] + bb[book] + gamma_mult)
    alpha = alpha_sum / train_len

    # Beta_u
    for user in userRatings:
        bu_sum = 0
        for book in userBooks[user]:
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
            bu_sum += userBookRatings[user][book] - (alpha + bb[book] + gamma_mult)
        bu[user] = bu_sum / (lambda_opt1 + len(userBooks[user]))
    
    # Beta_b
    for book in bookUsers:
        bb_sum = 0
        for user in bookUsers[book]:
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
            bb_sum += userBookRatings[user][book] - (alpha + bu[user] + gamma_mult)
        bb[book] = bb_sum / (lambda_opt1 + len(bookUsers[book]))
        
    # Gamma_u
    for user_ind, user in enumerate(all_users_lst):
        gamma_u_sum = 0
        for book in userBooks[user]:
            book_ind = all_books_lst.index(book)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            temp = (pred1 + pred2) - rating
            gamma_u_sum += gamma_b[book_ind] * temp
        gradient = 2 * (gamma_u_sum + lambda_opt2 *  gamma_u[user_ind])
        # print(update)
        gamma_u[user_ind] = gamma_u[user_ind] - step_size * gradient
    assert max(gamma_u) != np.inf
    print('gamma u', gamma_u)
    # print('gamma u', max(gamma_u), sum(gamma_u) / len(gamma_u))
    
    # Gamma_b
    for book_ind, book in enumerate(all_books_lst):
        gamma_b_sum = 0
        for user in bookUsers[book]:
            user_ind = all_users_lst.index(user)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            temp = (pred1 + pred2) - rating
            gamma_b_sum += gamma_u[user_ind] * temp
        gradient = 2 * (gamma_b_sum + lambda_opt2 * gamma_b[book_ind])
        gamma_b[book_ind] = gamma_b[book_ind] - step_size * gradient
    assert max(gamma_b) != np.inf
    print('gamma b', gamma_b[:6])
    # print('gamma b', max(gamma_b), sum(gamma_b) / len(gamma_b))

# ALT LEAST SQUARES

In [None]:
import numpy.linalg as la
import scipy.sparse.linalg as sla

def least_square_feat(datum, gamma_fixed = 'user'):
    user, book, _ = datum
    feat = [alpha]
    user_ind = all_users_lst.index(user)
    book_ind = all_books_lst.index(book)
    feat.append(bu[user_ind])
    feat.append(bb[book_ind])
    feat.append(gamma_u[user_ind] * gamma_b[book_ind])
    return feat
    

In [None]:
train_len = len(train)
alpha = 1
all_users_lst = list(all_users)
all_books_lst = list(all_books)
all_users_lst.sort()
all_books_lst.sort()

bu = np.ones(len(all_users_lst))
bb = np.ones(len(all_books_lst))
gamma_u = np.ones(len(all_users_lst))
gamma_b = np.ones(len(all_books_lst))

LS_feat   = np.array([least_square_feat(d) for d in train])
LS_labels = [int(rating) for _, _, rating in train]

w = sla.lsqr(LS_feat_train, LS_labels_train, damp = 1)[0]

alpha = w[0]; bu = w[1]; bb = w[2]; gamma_u = w[3] / gamma_b

LS_feat   = np.array([least_square_feat(d) for d in train])
LS_labels = [int(rating) for _, _, rating in train]

# GRADEINT DESCENT PREVIOUS

In [None]:
# Gradient Descent 2
# 0.005 not bad
step_size = 0.02

lambda_opt = 1; threshold = 10**(-4)
train_len = len(train)
k = 1
all_users_lst = list(all_users)
all_books_lst = list(all_books)
all_users_lst.sort()
all_books_lst.sort()

gamma_u = np.random.rand(len(all_users_lst)) - 0.5
gamma_b = np.random.rand(len(all_books_lst)) - 0.5
init_gamma_u = gamma_u
init_gamma_b = gamma_b
print('Starting Descent...')
# print('gamma u', gamma_u[:6])
# print('gamma b', gamma_b[:6])

bu = defaultdict(lambda: 1)
bb = defaultdict(lambda: 1)

for i in range(20):  
    
    rating_labels = []
    diff = 0
    for user, book, rating in val:
        if user in all_users_lst and book in all_books_lst: 
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            user_rating = alpha + bu[user] + bb[book] + gamma_u[user_ind] * gamma_b[book_ind]
        else:
            user_rating = alpha + bu[user] + bb[book];
        diff += (user_rating - int(rating)) ** 2

    MSE = diff / len(val)
    print('MSE on the validation set', MSE)
    
    # Alpha
    alpha_sum = 0
    for user, book, _ in train:
        user_ind = all_users_lst.index(user)
        book_ind = all_books_lst.index(book)
        gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
        alpha_sum += userBookRatings[user][book] - (bu[user] + bb[book] + gamma_mult)
    alpha = alpha_sum / train_len

    # Beta_u
    for user in userRatings:
        bu_sum = 0
        for book in userBooks[user]:
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
            bu_sum += userBookRatings[user][book] - (alpha + bb[book] + gamma_mult)
        bu[user] = bu_sum / (lambda_opt + len(userBooks[user]))
    
    # Beta_b
    for book in bookUsers:
        bb_sum = 0
        for user in bookUsers[book]:
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
            bb_sum += userBookRatings[user][book] - (alpha + bu[user] + gamma_mult)
        bb[book] = bb_sum / (lambda_opt + len(bookUsers[book]))
        
    # Gamma_u
    for user_ind, user in enumerate(all_users_lst):
        gamma_u_sum = 0
        for book in userBooks[user]:
            book_ind = all_books_lst.index(book)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            temp = (pred1 + pred2) - rating
            gamma_u_sum += gamma_b[book_ind] * temp
        gradient = 2 * (gamma_u_sum + lambda_opt *  gamma_u[user_ind])
        # print(update)
        gamma_u[user_ind] = gamma_u[user_ind] - step_size * gradient
    assert max(gamma_u) != np.inf
    print('gamma u', gamma_u)
    # print('gamma u', max(gamma_u), sum(gamma_u) / len(gamma_u))
    
    # Gamma_b
    for book_ind, book in enumerate(all_books_lst):
        gamma_b_sum = 0
        for user in bookUsers[book]:
            user_ind = all_users_lst.index(user)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            temp = (pred1 + pred2) - rating
            gamma_b_sum += gamma_u[user_ind] * temp
        gradient = 2 * (gamma_b_sum + lambda_opt * gamma_b[book_ind])
        gamma_b[book_ind] = gamma_b[book_ind] - step_size * gradient
    assert max(gamma_b) != np.inf
    print('gamma b', gamma_b[:6])
    # print('gamma b', max(gamma_b), sum(gamma_b) / len(gamma_b))
    
#     rating_labels = []
#     diff = 0
#     for user, book, rating in val:
#         if user in all_users_lst and book in all_books_lst: 
#             user_ind = all_users_lst.index(user)
#             book_ind = all_books_lst.index(book)
#             user_rating = alpha + bu[user] + bb[book] + gamma_u[user_ind] * gamma_b[book_ind]
#         else:
#             user_rating = alpha + bu[user] + bb[book];
#         diff += (user_rating - int(rating)) ** 2

#     MSE = diff / len(val)
#     print('MSE on the validation set', MSE)
        

In [None]:
# Gradient Descent 2
lambda_opt = 1; threshold = 10**(-4)
train_len = len(train)
k = 1
all_users_lst = list(all_users)
all_books_lst = list(all_books)
all_users_lst.sort()
all_books_lst.sort()

gamma_u = np.random.rand(len(all_users_lst)) 
gamma_b = np.random.rand(len(all_books_lst))
print(gamma_u)
print(gamma_b)

for i in range(20):
    alpha_sum, bu_sum, bb_sum = 0, 0, 0

    train_len = len(train)
    bu = defaultdict(lambda: 1)
    bb = defaultdict(lambda: 1)

    # Alpha
    alpha_sum = 0
    for user, book, _ in train:
        user_ind = all_users_lst.index(user)
        book_ind = all_books_lst.index(book)
        gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
        alpha_sum += userBookRatings[user][book] - (bu[user] + bb[book] + gamma_mult)
    alpha = alpha_sum / train_len

    # Beta_u
    for user in userRatings:
        bu_sum = 0
        for book in userBooks[user]:
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
            bu_sum += userBookRatings[user][book] - (alpha + bb[book] + gamma_mult)
        bu[user] = bu_sum / (lambda_opt + len(userBooks[user]))
    
    # Beta_b
    for book in bookUsers:
        bb_sum = 0
        for user in bookUsers[book]:
            user_ind = all_users_lst.index(user)
            book_ind = all_books_lst.index(book)
            gamma_mult = gamma_u[user_ind] * gamma_b[book_ind]
            bb_sum += userBookRatings[user][book] - (alpha + bu[user] + gamma_mult)
        bb[book] = bb_sum / (lambda_opt + len(bookUsers[book]))
          
    # Gamma_u
    for user_ind, user in enumerate(all_users_lst):
        gamma_u_sum = 0
        for book in userBooks[user]:
            book_ind = all_books_lst.index(book)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            err = rating - (pred1 + pred2)
            gamma_u_sum += err
            gradient = - 2 * gamma_u_sum * gamma_b[book_ind] / train_len
        gamma_u[user_ind] = gamma_u[user_ind] - gamma_u_sum * gradient * lambda_opt
    print('gamma u', max(gamma_u), sum(gamma_u) / len(gamma_u))
    
    # Gamma_b
    for book_ind, book in enumerate(all_books_lst):
        gamma_b_sum = 0
        for user in bookUsers[book]:
            user_ind = all_users_lst.index(user)
            pred1 = alpha + bu[user] + bb[book]
            pred2 = gamma_u[user_ind] * gamma_b[book_ind]
            rating = userBookRatings[user][book]
            err = rating - (pred1 + pred2)
            gamma_b_sum += err
            gradient = - 2 * gamma_b_sum * gamma_u[user_ind] / train_len
        gamma_b[book_ind] = gamma_b[book_ind] - gamma_b_sum * gradient * lambda_opt
    print('gamma b', max(gamma_b), sum(gamma_b) / len(gamma_b))