In [7]:
import numpy



def matrix_factorization(R, P, Q, K, steps=5000, alpha=0.0002, beta=0.02):
   #steps: iterations
    #alpha: learning rate
    #beta: regularization parameter
    Q = Q.T

    for step in range(steps):

        for i in range(len(R)):

            for j in range(len(R[i])):

                if R[i][j] > 0:
# calculate error
                    eij = R[i][j] - numpy.dot(P[i,:],Q[:,j])
# Update P and Q based on the error
                    for k in range(K):
# calculate gradient with a and beta parameter
                        P[i][k] = P[i][k] + alpha * (2 * eij * Q[k][j] - beta * P[i][k])

                        Q[k][j] = Q[k][j] + alpha * (2 * eij * P[i][k] - beta * Q[k][j])

        eR = numpy.dot(P,Q)
# Calculate the error (after each full pass through the matrix)
        e = 0

        for i in range(len(R)):

            for j in range(len(R[i])):

                if R[i][j] > 0:

                    e = e + pow(R[i][j] - numpy.dot(P[i,:],Q[:,j]), 2)

                    for k in range(K):

                        e = e + (beta/2) * (pow(P[i][k],2) + pow(Q[k][j],2))
# If the error(0.001:local minimum) is less than a threshold, break the loop
        if e < 0.001:

            break
    return P, Q.T

In [8]:
R = [

     [5,3,0,1],

     [4,0,0,1],

     [1,1,0,5],

     [1,0,0,4],

     [0,1,5,4],

    ]



R = numpy.array(R)
10

# N: num of User
N = len(R)
# M: num of Movie
M = len(R[0])
# Num of Features
K = 2

# Initialize P and Q with random values

P = numpy.random.rand(N,K)

Q = numpy.random.rand(M,K)



nP, nQ = matrix_factorization(R, P, Q, K)

nR = numpy.dot(nP, nQ.T)

In [10]:
import numpy as np
import pandas as pd

# code to initialize R, N, M, P, Q, and perform matrix factorization

# Convert matrices to DataFrames
df_R = pd.DataFrame(R, columns=[f'Item {i}' for i in range(M)], index=[f'User {i}' for i in range(N)])
df_P = pd.DataFrame(nP, columns=[f'Feature {i}' for i in range(K)], index=[f'User {i}' for i in range(N)])
df_Q = pd.DataFrame(nQ.T, columns=[f'Item {i}' for i in range(M)], index=[f'Feature {i}' for i in range(K)])
df_nR = pd.DataFrame(nR, columns=[f'Item {i}' for i in range(M)], index=[f'User {i}' for i in range(N)])

# Display DataFrames in tabular format
print(nR)
print("Original Rating Matrix R:")
print(df_R.to_string())
print("\nUser-Feature Matrix P:")
print(df_P.to_string())
print("\nItem-Feature Matrix Q:")
print(df_Q.to_string())
print("\nPredicted Rating Matrix nR:")
print(df_nR.to_string())


[[4.97616048 2.98466985 1.79171742 1.00064994]
 [3.98235783 2.40532227 1.66678937 0.99625051]
 [0.99906712 1.00581817 6.02017137 4.95097343]
 [1.00238733 0.92364819 4.84967477 3.9683933 ]
 [1.10976698 0.99270745 4.95312535 4.04435376]]
Original Rating Matrix R:
        Item 0  Item 1  Item 2  Item 3
User 0       5       3       0       1
User 1       4       0       0       1
User 2       1       1       0       5
User 3       1       0       0       4
User 4       0       1       5       4

User-Feature Matrix P:
        Feature 0  Feature 1
User 0   2.129933  -0.070707
User 1   1.710489   0.037312
User 2   0.571781   2.267907
User 3   0.543363   1.795473
User 4   0.590975   1.820067

Item-Feature Matrix Q:
             Item 0    Item 1    Item 2    Item 3
Feature 0  2.331411  1.404267  0.921616  0.537773
Feature 1 -0.147268  0.089459  2.422149  2.047475

Predicted Rating Matrix nR:
          Item 0    Item 1    Item 2    Item 3
User 0  4.976160  2.984670  1.791717  1.000650
User 1  3

In [None]:
import numpy as np
import pandas as pd

# Your code to initialize R, N, M, P, Q, and perform matrix factorization

# Convert matrices to DataFrames
df_R = pd.DataFrame(R, columns=[f'Item {i}' for i in range(M)], index=[f'User {i}' for i in range(N)])
df_P = pd.DataFrame(nP, columns=[f'Feature {i}' for i in range(K)], index=[f'User {i}' for i in range(N)])
df_Q = pd.DataFrame(nQ.T, columns=[f'Item {i}' for i in range(M)], index=[f'Feature {i}' for i in range(K)])
df_nR = pd.DataFrame(nR, columns=[f'Item {i}' for i in range(M)], index=[f'User {i}' for i in range(N)])

# Concatenate the DataFrames horizontally
result = pd.concat([df_R, df_P, df_Q, df_nR], axis=1)

# Display the result in tabular format
print(result.to_string())


           Item 0  Item 1  Item 2  Item 3  Feature 0  Feature 1    Item 0    Item 1    Item 2    Item 3    Item 0    Item 1    Item 2    Item 3
User 0        5.0     3.0     0.0     1.0   2.137264   0.768402       NaN       NaN       NaN       NaN  4.999437  2.922198  4.630586  0.996542
User 1        4.0     0.0     0.0     1.0   1.671667   0.694067       NaN       NaN       NaN       NaN  3.959488  2.321336  3.843375  0.996732
User 2        1.0     1.0     0.0     5.0  -0.017051   2.121880       NaN       NaN       NaN       NaN  1.084368  0.793753  5.029286  4.960747
User 3        1.0     0.0     0.0     4.0   0.026813   1.705189       NaN       NaN       NaN       NaN  0.958497  0.687679  4.094744  3.971444
User 4        0.0     1.0     5.0     4.0   0.459254   1.804142       NaN       NaN       NaN       NaN  1.940194  1.257238  4.897111  4.041110
Feature 0     NaN     NaN     NaN     NaN        NaN        NaN  2.149234  1.229218  1.310659 -0.373188       NaN       NaN       NaN   