In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

from Modules.dataEngineering import dataEngineering
dataEngineering = dataEngineering()

In [2]:
df_ratings        = dataEngineering.loadRatings()['data']

In [3]:
from surprise import Dataset, SVD, Reader
from surprise.model_selection import GridSearchCV

# Use movielens-100K
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df_ratings[['UserID', 'MovieID', 'userRating']], reader)

param_grid = {
    "n_epochs": [10, 30], 
    "lr_all": [0.0005, 0.005], 
    "reg_all": [0.4, 0.6]
}

gs = GridSearchCV(SVD, param_grid, measures=["rmse", "mae"], cv=5)
gs.fit(data)

In [4]:
gs.best_score

{'rmse': 0.8806361486916512, 'mae': 0.6800631552348111}

{'rmse': 0.8806361486916512, 'mae': 0.6800631552348111}

In [5]:
gs.best_params

{'rmse': {'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.4},
 'mae': {'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.4}}

In [6]:
output = {
    'rmse': {'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.4},
    'mae': {'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.4}
}

In [7]:
option_SVD = {
    'n_epochs': 30, 'lr_all': 0.005, 'reg_all': 0.4
}

In [8]:
from surprise.model_selection import train_test_split
from surprise import accuracy

In [9]:
train_set, test_set = train_test_split(data, test_size=0.2, random_state=42)

model_SVD = SVD(n_epochs=option_SVD['n_epochs'], lr_all=option_SVD['lr_all'], reg_all=option_SVD['reg_all'],)

model_SVD.fit(train_set)

predictions = model_SVD.test(test_set)
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

RMSE: 0.8802
MAE:  0.6786


In [10]:
def predict_model(pivot):
    matrix = pivot.copy()
    u_col = 0
    ratings = []
    for u_id, row in matrix.iterrows():
        for m_col, item in enumerate(row):
            m_id = matrix.columns[m_col]
            if pd.isnull(item):
                prep_rating = round(model_SVD.predict(u_id, m_id).est, 4)
                ratings.append([u_id, m_id, prep_rating])
                matrix.iloc[u_col][m_id] = prep_rating
        u_col += 1
    
    recomment_unwatched = pd.DataFrame(ratings, columns=['UserID', 'MovieID', 'prep_Rating'])
    return matrix, recomment_unwatched.sort_values(by=['prep_Rating'], ascending=False)

In [11]:
df_pivot = df_ratings.pivot(index='UserID', columns='MovieID' , values='userRating')

In [12]:
mat, prep = predict_model(df_pivot.iloc[1:2])

In [13]:
mat

MovieID,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,3.9356,3.6627,3.4858,3.1889,3.3452,3.9675,3.356,3.525,3.3795,3.6152,...,3.6312,3.6674,3.7162,3.7761,3.6803,3.7181,3.5945,3.6779,3.5908,3.6674


In [14]:
prep[prep["UserID"] == 2].head(10)

Unnamed: 0,UserID,MovieID,prep_Rating
9571,2,177593,4.371
838,2,1104,4.3579
880,2,1178,4.3415
2844,2,3814,4.3047
2278,2,3030,4.301
903,2,1204,4.2943
2576,2,3451,4.2777
915,2,1217,4.2704
941,2,1245,4.2693
2616,2,3508,4.2628


Save Model

In [15]:
import joblib

joblib.dump(model_SVD, 'models/model_SVD.pkl')

['models/model_SVD.pkl']