In [None]:
!pip install git+https://github.com/microsoft/recommenders.git
!pip install cornac

Collecting git+https://github.com/microsoft/recommenders.git
  Cloning https://github.com/microsoft/recommenders.git to /tmp/pip-req-build-hqb7m3pi
  Running command git clone --filter=blob:none --quiet https://github.com/microsoft/recommenders.git /tmp/pip-req-build-hqb7m3pi
  Resolved https://github.com/microsoft/recommenders.git to commit cd41f95515d297f1b958e36a6e569fedcc82a573
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting category-encoders<3,>=2.6.0 (from recommenders==1.2.0)
  Downloading category_encoders-2.6.3-py2.py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/81.9 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cornac<2,>=1.15.2 (from recommenders==1.2.0)
  Downloading cornac-1.18.0-cp310-cp310-manylinux1_x86_64.whl (21.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0

In [None]:
import os
import sys
import torch
import cornac

from recommenders.datasets.python_splitters import python_random_split
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED
from recommenders.evaluation.python_evaluation import (
    map,
    ndcg_at_k,
    precision_at_k,
    recall_at_k,
)
from cornac.eval_methods import RatioSplit
from cornac.metrics import MAE, RMSE, Precision, Recall, NDCG, MAP, NCRR

print(f"System version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"Cornac version: {cornac.__version__}")

System version: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]
PyTorch version: 2.2.1+cu121
Cornac version: 1.18


In [None]:
# set the parameters
top_k = 50

# Model parameters
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "sigmoid"
LIKELIHOOD = "pois"
NUM_EPOCHS = 30
BATCH_SIZE = 100
LEARNING_RATE = 0.001

In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
train_path = '/content/drive/MyDrive/SMU_MITB_RS/Project1/cs608_ip_train_v3.csv'
test_path =  '/content/drive/MyDrive/SMU_MITB_RS/Project1/cs608_ip_probe_v3.csv'

import pandas as pd

# Read the file into a pandas DataFrame
df_train = pd.read_csv(train_path)
df_test = pd.read_csv(test_path)
# Combine the two df for ratiosplit
df = pd.concat([df_train, df_test], axis=0, ignore_index=True)

# Display the DataFrame
print(df)

Mounted at /content/drive
        user_id  item_id  rating
0         12108        1       2
1         12108        2       5
2         12108        3       4
3         12108        4       4
4         12108        5       5
...         ...      ...     ...
281316    12568     1511       3
281317    12568    12389       4
281318    14361    18535       5
281319    14361    16572       5
281320    14361    19047       5

[281321 rows x 3 columns]


In [None]:
print(df.dtypes)

user_id    int64
item_id    int64
rating     int64
dtype: object


In [None]:
# ratio_split = RatioSplit(data=df.values, test_size=0.2, rating_threshold=4.0, seed=2024)
ratio_split = RatioSplit(data=df.values, test_size=0.1, val_size=0.1, rating_threshold=4.0, seed=2024)

In [None]:
from cornac.hyperopt import GridSearch,Discrete
from cornac.models import BiVAECF
import torch

param_grid =[
    Discrete(name = 'k',values =[50, 100, 200] ),
    Discrete(name = 'encoder_structure',values =[[50],[100],[50,100]] ),
    Discrete(name = 'learning_rate',values =[0.001,0.01,0.1]),
    Discrete(name = 'batch_size',values =[100,200,300]),
]

bivae_model = BiVAECF(
    k=LATENT_DIM,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)


# Initialize GridSearch
gs= GridSearch(model=bivae_model,space = param_grid,metric = Recall(k=top_k),eval_method = ratio_split)

# Run the grid search
cornac.Experiment(
    eval_method=ratio_split,
    models=[gs],
    metrics = [NDCG(k=50),NCRR(k=50),Recall(k=50)] ,
    user_based=False
).run()

print("Best parameters found:", gs.best_params)
print("Best NDCG score:", gs.best_result)


Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 100, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 200, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [50, 100], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 100, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.001}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.01}


  0%|          | 0/30 [00:00<?, ?it/s]

Evaluating: {'batch_size': 300, 'encoder_structure': [100], 'k': 200, 'learning_rate': 0.1}


  0%|          | 0/30 [00:00<?, ?it/s]

Best parameter settings: {'batch_size': 100, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.001}
Recall@50 = 0.0906

VALIDATION:
...
                   | NCRR@50 | NDCG@50 | Recall@50 | Time (s)
------------------ + ------- + ------- + --------- + --------
GridSearch_BiVAECF |  0.0170 |  0.0327 |    0.0906 |  45.6239

TEST:
...
                   | NCRR@50 | NDCG@50 | Recall@50 |  Train (s) | Test (s)
------------------ + ------- + ------- + --------- + ---------- + --------
GridSearch_BiVAECF |  0.0164 |  0.0323 |    0.0912 | 25176.9928 |  46.7654

Best parameters found: {'batch_size': 100, 'encoder_structure': [100], 'k': 50, 'learning_rate': 0.001}


AttributeError: 'GridSearch' object has no attribute 'best_result'

In [None]:
bivae = cornac.models.BiVAECF(
    k=LATENT_DIM,
    encoder_structure=ENCODER_DIMS,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)

  # with Timer() as t:
  #     bivae.fit(train_set)
  # print("Took {} seconds for training.".format(t))
metrics = [NDCG(k=50),NCRR(k=50),Recall(k=50)]
experiment = cornac.Experiment(eval_method=ratio_split, models=[bivae], metrics=metrics)

# run the model
experiment.run()


  0%|          | 0/30 [00:00<?, ?it/s]


VALIDATION:
...
        | NCRR@50 | NDCG@50 | Recall@50 | Time (s)
------- + ------- + ------- + --------- + --------
BiVAECF |  0.0170 |  0.0327 |    0.0906 |  46.3754

TEST:
...
        | NCRR@50 | NDCG@50 | Recall@50 | Train (s) | Test (s)
------- + ------- + ------- + --------- + --------- + --------
BiVAECF |  0.0164 |  0.0323 |    0.0912 |  247.2810 |  47.8371



In [None]:
import numpy as np
# Retrieve the top 50 recommendations for each user
train_user_ids = ratio_split.train_set.user_ids
val_user_ids = ratio_split.val_set.user_ids
test_user_ids = ratio_split.test_set.user_ids

all_user_ids = np.unique(np.concatenate((train_user_ids, test_user_ids,val_user_ids)))

# sort the result by user_ids
sorted_user_ids = np.sort(all_user_ids)


output_file_path = 'all_user_top_50_recommendations_sorted.txt'
with open(output_file_path, 'w') as file:
    for user_id in sorted_user_ids:
        recommendation_line = ' '.join(str(item_id) for item_id in item_recommendations)
        file.write(recommendation_line + '\n')



In [None]:
print(sorted_user_ids.shape)

(21124,)


In [None]:
import os
print(os.getcwd())


/content


In [None]:
!ls


all_user_top_50_recommendations_sorted.txt  CornacExp-2024-05-15_06-13-13-525471.log  drive
CornacExp-2024-05-15_05-15-34-511127.log    CornacExp-2024-05-15_14-41-55-983358.log  sample_data
CornacExp-2024-05-15_06-04-44-552047.log    CornacExp-2024-05-15_14-51-19-606922.log


In [None]:
from google.colab import files
files.download('all_user_top_50_recommendations_sorted.txt')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>