In [1]:
import os
import time
import math as mt
import numpy as np
import pandas as pd
import pickle
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds

In [2]:
class SVDModel:
    def __init__(self):
        self.n_users = 0
        self.n_items = 0

    def load_data(self):
        data = pd.read_json('../data/ratings_data.json', orient='split')
        self.n_users = data['userId'].unique().shape[0]
        self.n_items = data['movieId'].unique().shape[0]
        self.users = data['userId'].unique()
        self.movies = data['movieId'].unique()
        
        data_matrix = pd.DataFrame(np.zeros((self.n_users, self.n_items)), columns=self.movies, index=self.users)
        for line in data.itertuples():
            data_matrix.at[line[4], line[1]] = line[2]
            
        return csr_matrix(data_matrix, dtype=np.float32)
    
    def _save_pickle_file(self, file_name, data):
        file_name = f'./models/SVD/{file_name}.pickle'
        mapping_file = open(file_name, 'wb')
        pickle.dump(data, mapping_file)
        mapping_file.close()  
    
    def save(self, U, sigma, Vt, data_mean):
        if not os.path.exists('./models/SVD'):
            os.makedirs('./models/SVD')
        
        self._save_pickle_file('u', U)
        self._save_pickle_file('sigma', sigma)
        self._save_pickle_file('vt', Vt)
        self._save_pickle_file('data_mean', data_mean)
    
    def train(self, data, k):
        data_mean = np.mean(data, axis=1)
        data_demeaned = data - data_mean.reshape(-1, 1)
        U, sigma, Vt = svds(data_demeaned, k=k)
        sigma = np.diag(sigma)
        
        return U, sigma, Vt, data_mean

In [3]:
class SVDRecommender:
    def __init__(self):
        self.U = SVDRecommender.load_pickle_file('./models/SVD/u')
        self.sigma = SVDRecommender.load_pickle_file('./models/SVD/sigma')
        self.Vt = SVDRecommender.load_pickle_file('./models/SVD/vt')
        self.data_mean = SVDRecommender.load_pickle_file('./models/SVD/data_mean')
        
    @staticmethod
    def load_pickle_file(file_name):
        file = open(f'{file_name}.pickle', 'rb')
        object_file = pickle.load(file)
        return object_file
    
    def get_predicted_ratings(self):
        data = pd.read_json('../data/ratings_data.json', orient='split')
        users_movies = data[['movieId', 'userId']]
        all_predicted_ratings = np.dot(np.dot(self.U, self.sigma), self.Vt) + self.data_mean.reshape(-1, 1)
        ratings_df = pd.DataFrame(all_predicted_ratings, columns=data['movieId'].unique(), index=data['userId'].unique())
        
        return ratings_df, users_movies
    
    def recommend(self, user_id, n=10):
        start = time.time()
        
        ratings_df, users_movies = recommender.get_predicted_ratings()
        user_rated_movies = users_movies.loc[users_movies['userId'] == user_id]['movieId'].values
        predicted_ratings = pd.DataFrame(ratings_df.loc[user_id])
        predicted_ratings.columns = ['rating']
        recommended_movies = predicted_ratings.drop(user_rated_movies).sort_values(['rating'], ascending=False).head(n)

        end = time.time()
        print(f'Finished in: {end - start}')
        
        return recommended_movies

In [4]:
model = SVDModel()

In [5]:
data = model.load_data()

In [6]:
U, sigma, Vt, data_mean = model.train(data, 90)
print(U)

[[-0.00170024 -0.00036496 -0.00594402 ... -0.00164977 -0.00385686
   0.00194815]
 [ 0.01608214 -0.01363158  0.01324617 ...  0.06632702 -0.01096
   0.02037981]
 [ 0.03100627 -0.00777442  0.00586523 ...  0.00794039  0.01103719
   0.01723831]
 ...
 [ 0.00232087 -0.01211212  0.00466615 ...  0.01047112 -0.00144654
   0.01492971]
 [ 0.0050424   0.02910795 -0.03370144 ... -0.00962528  0.0138329
   0.04610252]
 [-0.00542368 -0.01454618  0.00361937 ...  0.00300588  0.00475597
   0.00255569]]
[[ 55.107914   0.         0.       ...   0.         0.         0.      ]
 [  0.        55.445606   0.       ...   0.         0.         0.      ]
 [  0.         0.        55.569416 ...   0.         0.         0.      ]
 ...
 [  0.         0.         0.       ... 199.89346    0.         0.      ]
 [  0.         0.         0.       ...   0.       242.12561    0.      ]
 [  0.         0.         0.       ...   0.         0.       462.03787 ]]
[[ 0.00074737 -0.00069718  0.02562279 ...  0.00234557  0.00070568
  

In [7]:
model.save(U, sigma, Vt, data_mean)

In [8]:
recommender = SVDRecommender()

In [9]:
recommender.recommend(3, 20)

Finished in: 0.5609989166259766


Unnamed: 0,rating
2571,1.822204
1704,1.625873
4993,1.457082
1,1.408086
480,1.392061
5952,1.355523
260,1.313659
2329,1.229195
590,1.08001
4306,1.061868
