In [1]:
# Understanding Non-Negative Matrix Factorization(NMF)
from sklearn.decomposition import NMF
import scipy.sparse as sparse
from scipy.sparse.linalg import spsolve
import numpy as np
from numpy.linalg import solve
import pandas as pd

X = np.array([[1, 2, 3], [5, 10, 15]])
print("X is:\n", X)
model = NMF(n_components=2, init='random', random_state=42)
W = model.fit_transform(X)
H = model.components_
print("W is:\n", W)
print("H is:\n", H)
print("The Result of Matrix Multiplication of W and H is Same as X:\n", np.matmul(W, H))

X is:
 [[ 1  2  3]
 [ 5 10 15]]
W is:
 [[4.64104680e-01 4.60521425e-01]
 [1.00947951e+01 2.77553851e-05]]
H is:
 [[0.49530014 0.99060032 1.48590048]
 [1.67229763 3.34459522 5.01689283]]
The Result of Matrix Multiplication of W and H is Same as X:
 [[ 1.          2.          3.        ]
 [ 4.99999978 10.00000002 15.00000006]]


### CODE REFERENCES "Diff. Approaches of Building Recommender System" on KAGGLE

In [5]:
ratings = pd.read_csv('./Data_Amar/ratings_small.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [10]:
len(ratings['movieId'].unique())
nb_users  = ratings['userId'].nunique()
nb_movies = ratings['movieId'].nunique()

print("There are %d unique users and %d unique movies; so we need to prepare " 
      "an matrix of size %d by %d." %(nb_users, nb_movies, nb_users, nb_movies))

There are 671 unique users and 9066 unique movies; so we need to prepare an matrix of size 671 by 9066.


In [11]:
ratings_matrix = ratings.pivot_table(index=['userId'],columns=['movieId'],values='rating').reset_index(drop=True)
ratings_matrix.fillna(0, inplace = True)

data_matrix = np.array(ratings_matrix)
print(data_matrix.shape)
print(data_matrix)

(671, 9066)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 [5. 0. 0. ... 0. 0. 0.]]


In [12]:
data_matrix[0][30]

2.5

In [13]:
model = NMF(n_components=2, init='random', random_state=42)
user_vec = model.fit_transform(data_matrix)
item_vec = model.components_.T

def implicit_ALS(ratings, user_vec, item_vec, lambda_val, iteration, typ):                 
    
    ctr = 1

    if typ == 'user':
        while ctr <= iteration:
            YTY = item_vec.T.dot(item_vec)
            lambdaI = np.eye(YTY.shape[0]) * lambda_val

            for u in range(user_vec.shape[0]):
                user_vec[u, :] = solve((YTY + lambdaI), 
                                        ratings[u, :].dot(item_vec))
            ctr += 1

        return user_vec
    
    if typ == 'item':
        while ctr <= iteration:
            XTX = user_vec.T.dot(user_vec)
            lambdaI = np.eye(XTX.shape[0]) * lambda_val
            
            for i in range(item_vec.shape[0]):
                item_vec[i, :] = solve((XTX + lambdaI), 
                                        ratings[:, i].T.dot(user_vec))
            ctr += 1
        return item_vec
        
    
user_vec = implicit_ALS(data_matrix, user_vec, item_vec, lambda_val=0.2,
                        iteration=20, typ='user')
item_vec = implicit_ALS(data_matrix, user_vec, item_vec, lambda_val=0.2,
                        iteration=20, typ='item')

def predict_all():
        """ Predict ratings for every user and item. """
        predictions = np.zeros((user_vec.shape[0], 
                                item_vec.shape[0]))
        for u in range(user_vec.shape[0]):
            for i in range(item_vec.shape[0]):
                predictions[u, i] = predict(u, i)
                
        return predictions
def predict(u, i):
    """ Single user and item prediction. """
    return user_vec[u, :].dot(item_vec[i, :].T)

predict = predict_all()


data_matrix_pred_ALS = np.where(data_matrix == 0, predict, data_matrix)

#for i in range(data_matrix_pred_ALS.shape[0]):
    #for j in range(data_matrix_pred_ALS.shape[1]):
        #data_matrix_pred_ALS[i, j] = round(data_matrix_pred_ALS[i, j])

#For measuring the performance we have to use the predicted matrix
X_pred = np.zeros((nb_users, nb_movies))
for i in range(predict.shape[0]):
    for j in range(predict.shape[1]):
        X_pred[i, j] = round(predict[i, j])

In [None]:
predict[0][0]

In [None]:
print("Original Rating Matrix: \n", data_matrix)

In [28]:
data_matrix[0][0]

0.0

In [48]:
data_matrix_pred_ALS[0][0]

0.04463731307654409

In [30]:
print("Rating Matrix After Applying ALS: \n", data_matrix_pred_ALS)

Rating Matrix After Applying ALS: 
 [[ 4.46373131e-02  1.54574778e-02  1.84056942e-02 ... -2.18339524e-04
  -1.31003714e-04  6.24331570e-03]
 [ 6.96105039e-01  2.80606536e-01  1.23278044e-01 ...  3.01565872e-04
   1.80939523e-04  1.99343091e-02]
 [ 7.43268871e-01  3.11536338e-01  8.22893054e-02 ...  1.43882440e-03
   8.63294639e-04 -2.04538774e-03]
 ...
 [ 2.70298530e-01  1.09112421e-01  4.72375538e-02 ...  1.31391749e-04
   7.88350496e-05  7.44193420e-03]
 [ 4.00000000e+00  2.15794316e-01  8.02441656e-02 ...  5.26443779e-04
   3.15866268e-04  8.88009291e-03]
 [ 5.00000000e+00  7.52684096e-01  2.37553694e-01 ...  2.69261007e-03
   1.61556604e-03  1.22193737e-02]]


In [14]:
ind_matrix = np.zeros((nb_users, nb_movies))
ind_matrix = np.where(data_matrix != 0, 1, 0)

x = np.multiply(X_pred, ind_matrix)
RMSE_ALS = np.sqrt(np.mean((x - data_matrix)**2))
print("RMSE of ALS Model is %f." %RMSE_ALS)

RMSE of ALS Model is 0.390683.


In [17]:
a = int(input("Customer ID"))
print("Generating Preferences for customer ID",a)

Customer ID1
Generating Preferences for customer ID 1


In [22]:
#Create an indicator matrix to ensure the movie was not rated previously
ind_matrix = np.zeros((nb_users, nb_movies))
ind_matrix = np.where(data_matrix == 0, 1, 0)

#Multiply predicted rating matrix with this indicator matrix to consider
#only the predicted ones
pred = np.multiply(data_matrix_pred_ALS, ind_matrix)
pred = pred[a-1, :]
#pred_df = pd.DataFrame(pred)
column_names = ["movieID", "Rating",'flag']
pred_df = pd.DataFrame(columns = column_names)
#pred_df = pred_df[pred_df['Rating'] > 0]
pred_df['flag']=ind_matrix[a-1,:]
pred_df['Rating']=pred
pred_df['movieID']=ratings['movieId']
pred_df['Fin Rating']=pred_df['flag']*pred_df['Rating']
print(pred_df.nlargest(100, 'Fin Rating'))

      movieID    Rating  flag  Fin Rating
535       671  0.124143     1    0.124143
733      2302  0.115184     1    0.115184
981        50  0.107705     1    0.107705
744        50  0.106266     1    0.106266
729      2273  0.106038     1    0.106038
...       ...       ...   ...         ...
1509     2706  0.067426     1    0.067426
1819     4451  0.066809     1    0.066809
1021      225  0.066739     1    0.066739
1393     2018  0.066686     1    0.066686
1046      329  0.066457     1    0.066457

[100 rows x 4 columns]


In [None]:
movies_df = pd.read_csv('Data/movies_metadata.csv')

In [11]:
data_matrix[0][30]

2.5

In [12]:
sum(data_matrix_pred_ALS[0,:])

50.0

In [13]:
ind_matrix[0][29]

1

In [33]:
pred[567]

0.04195564673353234

In [15]:
sum(pred)

0.0