In [1]:
from helpers import *
from recommenders.models.deeprec.models.graphrec.lightgcn import LightGCN
from recommenders.utils.constants import SEED as DEFAULT_SEED
from recommenders.models.deeprec.deeprec_utils import prepare_hparams
from recommenders.models.deeprec.DataModel.ImplicitCF import ImplicitCF
from recommenders.datasets.python_splitters import python_stratified_split


In [2]:
# Load datasets
df_articles, df_clicks, article_embeddings = load_dataset()

In [3]:
TOP_K = 10
EPOCHS = 50
BATCH_SIZE = 1024
SEED = DEFAULT_SEED

In [4]:
# preprocess data
df_clicks = preprocessing_clicks(df_clicks)
df_clicks = df_clicks.sample(n=10000, random_state=SEED)
df_clicks.rename(columns={'user_id': 'userID', "click_article_id":"itemID"}, inplace=True)
df_clicks["itemID"] = df_clicks["itemID"].astype('int64')
df_clicks['rating'] = 1.0

In [5]:
train, test = python_stratified_split(df_clicks, ratio=0.8, col_user="userID", col_item="itemID", seed=SEED)

In [6]:
train

Unnamed: 0,userID,session_id,session_size,itemID,click_environment,click_deviceGroup,click_os,click_country,click_region,click_referrer_type,session_start_dt,click_timestamp_dt,click_hour,click_dayofweek,rating
89128,10,1507917672187324,14,199207,4,1,17,1,18,1,2017-10-13 18:01:12,2017-10-13 20:01:48.504,20,4,1.0
2565312,24,1507855797268825,3,156381,4,1,17,1,25,1,2017-10-13 00:49:57,2017-10-13 00:58:48.496,0,4,1.0
232034,24,1507909485182712,4,225010,4,1,17,1,25,1,2017-10-13 15:44:45,2017-10-13 15:45:23.273,15,4,1.0
1493296,26,1507629776306390,9,31488,4,3,2,1,13,1,2017-10-10 10:02:56,2017-10-10 10:10:53.711,10,1,1.0
858287,32,1506825956252769,3,235840,4,3,2,1,25,2,2017-10-01 02:45:56,2017-10-01 03:11:33.776,3,6,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1599872,322151,1508200021452313,9,277107,4,4,20,1,25,1,2017-10-17 00:27:01,2017-10-17 01:12:23.250,1,1,1.0
1402380,322326,1508202080142667,4,331116,2,3,20,1,25,1,2017-10-17 01:01:20,2017-10-17 01:02:28.333,1,1,1.0
1404883,322466,1508203558326616,2,30760,4,1,17,1,25,6,2017-10-17 01:25:58,2017-10-17 01:48:06.892,1,1,1.0
1405206,322490,1508203735208741,4,42876,4,1,17,1,25,1,2017-10-17 01:28:55,2017-10-17 01:37:30.998,1,1,1.0


In [7]:
fake_data = pd.DataFrame({
    'userID': [1],
    'itemID': [1],
    'rating': [1.0]
})
data = ImplicitCF(train=train, test=test, seed=SEED)

In [8]:
yaml_file = "input/lightgcn.yaml"
hparams = prepare_hparams(
    yaml_file,
    n_layers=3,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    learning_rate=0.005,
    eval_epoch=5,
    top_k=TOP_K,
    save_model=True,
    save_epoch=50,
    MODEL_DIR="./input/models/"
)

model = LightGCN(hparams, data, seed=42)

Already create adjacency matrix.
Already normalize adjacency matrix.
Using xavier initialization.


2024-05-27 09:35:21.487992: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled


In [9]:
checkpoint_path = "./input/models/epoch_50"
model.load(model_path=checkpoint_path)

INFO:tensorflow:Restoring parameters from ./input/models/epoch_50


In [10]:
def get_top_k_recommendations(user_id, model, data, k=5):
    # Check if the user exists in the dataset
    if user_id not in data.train['userID'].values and user_id not in data.test['userID'].values:
        print(f"User ID {user_id} not found in the training or testing set.")
        return []
    
    # Prepare a DataFrame for the specific user to get recommendations
    user_df = pd.DataFrame({'userID': [user_id] * data.n_items, 'itemID': range(data.n_items)})
    print(user_df)
    # Use the model to score all items for the user
    full_scores = model.recommend_k_items(user_df, top_k=data.n_items, remove_seen=False)
    full_scores['prediction'] = (full_scores['prediction'] - full_scores['prediction'].min()) / (full_scores['prediction'].max() - full_scores['prediction'].min())

    # Filter top k recommendations for the given user
    user_recommendations = full_scores[full_scores['userID'] == user_id].nlargest(k, 'prediction')
    
    # Extract item IDs and their scores
    recommended_items_with_scores = [(row['itemID'], row['prediction']) for _, row in user_recommendations.iterrows()]

    return recommended_items_with_scores, full_scores

In [11]:
train

Unnamed: 0,userID,session_id,session_size,itemID,click_environment,click_deviceGroup,click_os,click_country,click_region,click_referrer_type,session_start_dt,click_timestamp_dt,click_hour,click_dayofweek,rating
89128,10,1507917672187324,14,199207,4,1,17,1,18,1,2017-10-13 18:01:12,2017-10-13 20:01:48.504,20,4,1.0
2565312,24,1507855797268825,3,156381,4,1,17,1,25,1,2017-10-13 00:49:57,2017-10-13 00:58:48.496,0,4,1.0
232034,24,1507909485182712,4,225010,4,1,17,1,25,1,2017-10-13 15:44:45,2017-10-13 15:45:23.273,15,4,1.0
1493296,26,1507629776306390,9,31488,4,3,2,1,13,1,2017-10-10 10:02:56,2017-10-10 10:10:53.711,10,1,1.0
858287,32,1506825956252769,3,235840,4,3,2,1,25,2,2017-10-01 02:45:56,2017-10-01 03:11:33.776,3,6,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1599872,322151,1508200021452313,9,277107,4,4,20,1,25,1,2017-10-17 00:27:01,2017-10-17 01:12:23.250,1,1,1.0
1402380,322326,1508202080142667,4,331116,2,3,20,1,25,1,2017-10-17 01:01:20,2017-10-17 01:02:28.333,1,1,1.0
1404883,322466,1508203558326616,2,30760,4,1,17,1,25,6,2017-10-17 01:25:58,2017-10-17 01:48:06.892,1,1,1.0
1405206,322490,1508203735208741,4,42876,4,1,17,1,25,1,2017-10-17 01:28:55,2017-10-17 01:37:30.998,1,1,1.0


In [15]:
user_id = 10
recommended_items, full_scores = get_top_k_recommendations(user_id, model, data, k=5)
recommended_items

      userID  itemID
0         10       0
1         10       1
2         10       2
3         10       3
4         10       4
...      ...     ...
2566      10    2566
2567      10    2567
2568      10    2568
2569      10    2569
2570      10    2570

[2571 rows x 2 columns]


[(199207.0, 1.0),
 (156624.0, 0.8365248441696167),
 (313504.0, 0.788425862789154),
 (234698.0, 0.7718176245689392),
 (159275.0, 0.7652570009231567)]

In [16]:
full_scores

Unnamed: 0,userID,itemID,prediction
0,10,199207,1.000000
1,10,156624,0.836525
2,10,313504,0.788426
3,10,234698,0.771818
4,10,159275,0.765257
...,...,...,...
2566,10,283505,0.158598
2567,10,237524,0.115426
2568,10,354086,0.089389
2569,10,235854,0.016625
