In [8]:
import sys
sys.path.append('D:\\Coding\Machine_Learning\Recommendation_System\Modules')
from dataEngineering import dataEngineering

In [9]:
from surprise import Dataset, Reader
from sklearn.model_selection import train_test_split
from surprise import KNNWithMeans, accuracy
import pandas as pd

# Load your data and create the DataFrame
dataEngineering = dataEngineering()
df_rating = dataEngineering.loadRatings()['data']

df_rating['User_Index'] = df_rating['UserID'].astype('category').cat.codes
df_rating['Movie_Index'] = df_rating['MovieID'].astype('category').cat.codes

In [10]:
# Create a Reader object for the Surprise library
reader = Reader(rating_scale=(1, 5))

# Load the data into the Surprise Dataset
sparse = df_rating.pivot(index='User_Index', columns='Movie_Index', values='userRating')

In [11]:
X = df_rating.copy()
y = df_rating['User_Index']

X_train, X_test, _ ,_ = train_test_split(X, y, test_size = 0.20, stratify=y, random_state=42)

X_train = Dataset.load_from_df(X_train[['User_Index', 'Movie_Index', 'userRating']], reader)
X_test = Dataset.load_from_df(X_test[['User_Index', 'Movie_Index', 'userRating']], reader)

X_train = X_train.build_full_trainset()
X_test = X_test.build_full_trainset()
X_test = X_test.build_testset()

In [12]:
options = {
    'bsl_options': {'learning_rate': 0.0005, 'method': 'sgd'},
    'sim_options': {'name': 'cosine', 'user_based': True}
}
model = KNNWithMeans(sim_options=options['sim_options'], bsl_options=options['bsl_options'])
model.fit(X_train)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x24bafdf2800>

In [13]:
predictions= model.test(X_test)
accuracy.mae(predictions)
accuracy.rmse(predictions)

MAE:  0.6842
RMSE: 0.8914


0.8914340762701614

In [14]:
sparse.head(5)

Movie_Index,0,1,2,3,4,5,6,7,8,9,...,9669,9670,9671,9672,9673,9674,9675,9676,9677,9678
User_Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,4.0,,4.0,,,4.0,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,4.0,,,,,,,,,,...,,,,,,,,,,
4,,4.0,5.0,3.0,5.0,4.0,4.0,3.0,,3.0,...,,,,,,,,,,


In [15]:
def predict_model(matrix):
    ratings = []
    for u_index, row in matrix.iterrows():
        user_num = u_index - 1
        for i_index, item in enumerate(row):
            item_num = i_index
            item_rate = matrix.iloc[user_num][item_num]
            if pd.isnull(item_rate):
                matrix.iloc[user_num][item_num] = model.predict(user_num+1, item_num).est
    return matrix

In [16]:
prep_sparse = predict_model(sparse)

In [17]:
prep_sparse

Movie_Index,0,1,2,3,4,5,6,7,8,9,...,9669,9670,9671,9672,9673,9674,9675,9676,9677,9678
User_Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,4.000000,2.198597,4.000000,1.332069,1.772833,4.000000,2.007423,1.192626,2.100866,2.143433,...,2.306452,3.495995,2.306452,2.306452,2.306452,2.306452,2.306452,2.306452,2.306452,2.306452
1,3.852236,3.513024,3.495650,2.359891,2.942522,3.906393,3.236739,2.823574,3.444644,3.622873,...,3.389768,3.495995,3.889768,3.889768,3.389768,3.889768,3.389768,3.389768,3.389768,4.054331
2,4.172285,3.585039,3.646970,2.468142,3.063803,4.134394,3.352164,2.937749,3.509138,3.728477,...,3.714286,3.495995,3.714286,3.714286,3.714286,3.714286,3.714286,3.714286,3.714286,4.173241
3,4.000000,3.539384,3.420576,2.281050,2.877916,3.863514,3.166816,2.749546,3.351678,3.521630,...,3.513944,3.495995,3.513944,3.513944,3.513944,3.513944,3.513944,3.513944,3.513944,3.972899
4,3.592772,4.000000,5.000000,3.000000,5.000000,4.000000,4.000000,3.000000,3.099685,3.000000,...,3.048491,3.495995,3.548491,3.548491,3.048491,3.548491,3.048491,3.048491,3.048491,3.713054
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,2.500000,3.889280,3.755271,2.608925,3.198581,4.253921,2.500000,3.082626,3.674001,3.713742,...,3.641059,3.495995,4.141059,4.141059,3.641059,4.141059,3.641059,3.641059,3.641059,4.305622
461,4.000000,2.928974,2.992737,1.847546,2.435037,3.463333,2.728142,2.321361,2.914136,3.014941,...,2.885370,3.495995,3.385370,3.385370,2.885370,3.385370,2.885370,2.885370,2.885370,3.549933
462,2.500000,2.000000,2.000000,2.051580,2.652537,3.600528,2.978845,2.885604,3.093808,4.000000,...,3.300000,3.495995,3.300000,3.300000,3.300000,3.300000,3.300000,3.300000,3.300000,3.758955
463,3.000000,3.581342,3.586335,2.437337,3.030817,4.032187,3.322801,2.919534,3.507585,4.000000,...,3.477390,3.495995,3.977390,3.977390,3.477390,3.977390,3.477390,3.477390,3.477390,4.141952
