In [1]:
import numpy as np
import matplotlib.pyplot as plt

class MF:
    def __init__(self, R, K, alpha, tol):
        self.R = R  # user-item rating matrix
        self.num_users, self.num_items = R.shape  # number fo users & items
        self.K = K  # number of latent dimension
        self.alpha = alpha  # learning rate
        self.tol = tol

        self.U = np.random.rand(self.num_users, self.K)
        self.V = np.random.rand(self.num_items, self.K)

    def calcTotalErrors(self):
        E = self.R - np.dot(self.U, self.V.T)

        error = 0
        for i in range(self.num_users):
            for j in range(self.num_items):
                if self.R[i, j] > 0:
                    error += E[i, j] ** 2

        return error

    def train(self):
        itr = 0
        self.lstError = []

        error = self.calcTotalErrors()
        self.lstError.append(error)

        while error > self.tol:
            for i in range(self.num_users):
                for j in range(self.num_items):
                    if self.R[i, j] > 0:
                        e = self.R[i, j] - np.dot(self.U[i], self.V[j])
                        new_ui = self.U[i] + self.alpha * e * self.V[j]
                        new_vj = self.V[j] + self.alpha * e * self.U[i]

                        self.U[i] = new_ui
                        self.V[j] = new_vj

            itr += 1
            error = self.calcTotalErrors()
            self.lstError.append(error)

            print(f"Current iter: {itr}, with Error {error}")

    def plotErrorTrajectory(self):
        plt.plot(self.lstError, marker='o')
        plt.title('Error Over Iterations')
        plt.xlabel('Iterations')
        plt.ylabel('Error')
        plt.show()

    def saveModel(self):
        np.save('user_matrix.npy', self.U)
        np.save('item_matrix.npy', self.V)

    def loadModel(self):
        self.U = np.load('user_matrix.npy')
        self.V = np.load('item_matrix.npy')

    def estimate_rating(self, i, j):
        estimated_ratings = np.dot(self.U[i], self.V[j].T)

        return estimated_ratings

import pandas as pd

rawRatings = pd.read_csv("C:/Users/user/Desktop/2023-2/추천시스템/MovieLens/ratings.csv")
rawMovies = pd.read_csv("C:/Users/user/Desktop/2023-2/추천시스템/MovieLens/movies.csv")

rawData = pd.merge(rawRatings, rawMovies, on = 'movieId')
userMovieRating = rawData.pivot_table('rating', index = 'userId', columns='title')
userMovieRating.fillna(0, inplace = True)

R = np.array(userMovieRating)

K = 30  # Latent factor dimension
alpha = 0.01  # Learning rate
tol = 0.0001  # Convergence threshold

mf_model = MF(R, K, alpha, tol)
mf_model.train()
mf_model.plotErrorTrajectory()
mf_model.saveModel()

estimated_rating = mf_model.estimate_rating(1, 2)
print("Estimated rating:", estimated_rating)

Current iter: 1, with Error 122922.6167364933
Current iter: 2, with Error 87057.97864130558
Current iter: 3, with Error 76959.79592408144
Current iter: 4, with Error 70970.77057450774
Current iter: 5, with Error 66508.10575927119
Current iter: 6, with Error 62769.51018521485
Current iter: 7, with Error 59429.53711684414
Current iter: 8, with Error 56346.67362915459
Current iter: 9, with Error 53461.9989468307
Current iter: 10, with Error 50753.97703118964
Current iter: 11, with Error 48216.03847212103
Current iter: 12, with Error 45845.640309420705
Current iter: 13, with Error 43639.51410483019
Current iter: 14, with Error 41592.178152341825
Current iter: 15, with Error 39696.017919808844
Current iter: 16, with Error 37941.951978881894
Current iter: 17, with Error 36320.163600748434
Current iter: 18, with Error 34820.682972561604
Current iter: 19, with Error 33433.777352091
Current iter: 20, with Error 32150.179836827552
Current iter: 21, with Error 30961.2045823224
Current iter: 22, w

KeyboardInterrupt: 