##Import Libraries

In [2]:
pip install recommenders



In [3]:
import sys
import pandas as pd
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
from recommenders.datasets.python_splitters import python_stratified_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var,
                                                      map_at_k, ndcg_at_k, precision_at_k,
                                                      recall_at_k, get_top_k_items)

##Load Data

In [4]:
df = pd.read_csv("ratings.csv")

In [5]:
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [6]:
df['rating'].min(), df['rating'].max()

(0.5, 5.0)

In [7]:
df['userId'].nunique()

610

In [8]:
df['movieId'].nunique()

9724

## Split the data

In [9]:
train, test = python_stratified_split(df, ratio=0.75, col_user='userId', col_item='movieId')
test = test[test["userId"].isin(train["userId"].unique())]
test = test[test["movieId"].isin(train["movieId"].unique())]

## Save split data to files

In [10]:
train_file = "./train.csv"
test_file = "./test.csv"
train.to_csv(train_file, index=False)
test.to_csv(test_file, index=False)

## Constants

In [11]:
TOP_K = 10
EPOCHS = 20
BATCH_SIZE = 128
SEED = 1234

##NCF model

In [12]:
# Load dataset
data = NCFDataset(train_file=train_file, test_file=test_file, col_user='userId', col_item='movieId', seed=SEED)

100%|██████████| 610/610 [00:36<00:00, 16.83it/s]


In [14]:
# Initialize and train the NCF model
model = NCF(
    n_users=data.n_users,
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16, 8, 4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)



In [15]:
model.fit(data)

In [16]:
# Generate predictions for all user-item pairs
users, items, preds = [], [], []
unique_items = train.movieId.unique()

for user in train.userId.unique():
    user_list = [user] * len(unique_items)
    users.extend(user_list)
    items.extend(unique_items)
    preds.extend(list(model.predict(user_list, unique_items, is_list=True)))

In [17]:
# Create DataFrame for predictions
all_predictions = pd.DataFrame({
    "userId": users,
    "movieId": items,
    "prediction": preds
})

In [18]:
all_predictions = all_predictions[~all_predictions.set_index(['userId', 'movieId']).index.isin(train.set_index(['userId', 'movieId']).index)]

In [19]:
all_predictions.head()

Unnamed: 0,userId,movieId,prediction
174,1,74458,0.003016
175,1,8798,0.115136
176,1,122882,0.003419
177,1,106782,0.0046
178,1,48516,0.034793


In [20]:
print("Test columns:", test.columns)
print("All predictions columns:", all_predictions.columns)


Test columns: Index(['userId', 'movieId', 'rating', 'timestamp'], dtype='object')
All predictions columns: Index(['userId', 'movieId', 'prediction'], dtype='object')


In [21]:
test = test[['userId', 'movieId', 'rating']]

##Evaluation

In [22]:
eval_map = map_at_k(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction', k=TOP_K)
eval_rmse = rmse(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction')
eval_mae = mae(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction')
eval_rsquared = rsquared(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction')
eval_exp_var = exp_var(test, all_predictions, col_user='userId', col_item='movieId', col_prediction='prediction')

In [23]:
print(f"MAP:\t{eval_map:.6f}")
print(f"NDCG:\t{eval_ndcg:.6f}")
print(f"Precision:\t{eval_precision:.6f}")
print(f"Recall:\t{eval_recall:.6f}")
print(f"RMSE:\t{eval_rmse:.6f}")
print(f"MAE:\t{eval_mae:.6f}")
print(f"R-Squared:\t{eval_rsquared:.6f}")
print(f"Explained Variance:\t{eval_exp_var:.6f}")

MAP:	0.118558
NDCG:	0.210050
Precision:	0.197377
Recall:	0.084150
RMSE:	3.156577
MAE:	2.987445
R-Squared:	-8.272748
Explained Variance:	0.019518


##Prediction

In [24]:
print(model.predict(2, 91658))
print(all_predictions[all_predictions["userId"] == 1].sort_values('prediction', ascending=False))

0.9560695886611938
      userId  movieId    prediction
874        1     1200  9.402769e-01
277        1     1304  9.382423e-01
914        1     1036  9.313829e-01
932        1     2028  9.241468e-01
1393       1     3175  9.237729e-01
...      ...      ...           ...
3900       1   121374  1.409805e-09
3980       1   110773  8.675129e-10
3903       1     7225  8.530344e-10
3937       1   123200  7.421901e-10
3923       1   180265  4.955683e-10

[8583 rows x 3 columns]


In [42]:
import pickle

In [43]:
file__name = 'all_prediction.Sav'

In [44]:
pickle.dump(all_predictions, open(file__name, 'wb'))

In [41]:
# with open('model.pkl', 'wb') as f:
#     pickle.dump(model, f)

In [31]:
# # Save all predictions to a file
# all_predictions.to_csv('all_predictions.csv', index=False)