In [1]:
# Tahap 1: Setup

!nvidia-smi
!pip install cudf-cu12 cuml-cu12 --extra-index-url=https://pypi.nvidia.com

/bin/bash: line 1: nvidia-smi: command not found
Looking in indexes: https://pypi.org/simple, https://pypi.nvidia.com
Collecting nvidia-cuda-nvrtc-cu12==12.9.86.* (from cuda-toolkit[nvcc,nvrtc]==12.*->cudf-cu12)
  Downloading https://pypi.nvidia.com/nvidia-cuda-nvrtc-cu12/nvidia_cuda_nvrtc_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl (89.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.6/89.6 MB[0m [31m83.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cublas-cu12==12.9.1.4.* (from cuda-toolkit==12.*->cuda-toolkit[nvcc,nvrtc]==12.*->cudf-cu12)
  Downloading https://pypi.nvidia.com/nvidia-cublas-cu12/nvidia_cublas_cu12-12.9.1.4-py3-none-manylinux_2_27_x86_64.whl (581.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m581.2/581.2 MB[0m [31m49.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting nvidia-cufft-cu12==11.4.1.4.* (from cuda-toolkit==12.*->cuda-toolkit[nvcc,nvrtc]==12.*->cudf-cu12)
  Downloading https://pypi.nvi

In [2]:
!pip install implicit==0.7.2

Collecting implicit==0.7.2
  Downloading implicit-0.7.2.tar.gz (70 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/70.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.3/70.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: implicit
  Building wheel for implicit (pyproject.toml) ... [?25l[?25hdone
  Created wheel for implicit: filename=implicit-0.7.2-cp312-cp312-linux_x86_64.whl size=10797530 sha256=c64bf59b27bd239befff8c7769c8f42f259e5ed5c8b89ce5b6832f86174f116f
  Stored in directory: /root/.cache/pip/wheels/b2/00/4f/9ff8af07a0a53ac6007ea5d739da19cfe147a2df542b6899f8
Successfully built implicit
Installing collected packages: implicit
Successfully installed implicit-0.7.2


In [3]:
# Tahap 2: Import dan Load Dataset

import os
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from implicit.bpr import BayesianPersonalizedRanking
from tqdm import tqdm

df = pd.read_csv("train.csv")



In [4]:
# Tahap 3: Encode User dan Item ke Index Numerik

user_enc, item_enc = LabelEncoder(), LabelEncoder()
df["user_idx"] = user_enc.fit_transform(df["user_id"])
df["item_idx"] = item_enc.fit_transform(df["item_id"])

n_users = df["user_idx"].nunique()
n_items = df["item_idx"].nunique()

print(f"\nJumlah user (idx): {n_users}, jumlah item (idx): {n_items}")


Jumlah user (idx): 13876, jumlah item (idx): 123069


In [5]:
# Tahap 4: Bangun User dan Item Matrix

user_item_matrix = csr_matrix(
    (np.ones(len(df)), (df["user_idx"], df["item_idx"])),
    shape=(n_users, n_items)
)

print("user_item_matrix.shape =", user_item_matrix.shape)
print("nnz (jumlah interaksi) =", user_item_matrix.nnz)

user_item_matrix.shape = (13876, 123069)
nnz (jumlah interaksi) = 269764


In [6]:
# Tahap 5:Training Model
bpr_model = BayesianPersonalizedRanking(
    factors=128,           # lebih banyak latent factor → model lebih ekspresif
    learning_rate=0.01,    # lebih cepat belajar
    regularization=0.01,   # kurangi regularisasi supaya lebih fleksibel
    iterations=100,        # training lebih lama, lebih stabil
    random_state=42
)

print("\nMulai training BPR ...")
bpr_model.fit(user_item_matrix)
print("Training selesai.")


Mulai training BPR ...


  0%|          | 0/100 [00:00<?, ?it/s]

Training selesai.


In [7]:
# Tahap 6: Fungsi Rekomendasi
def recommend_for_user(user_idx, N=10):
    """Memberikan rekomendasi item untuk user tertentu."""
    if user_idx < 0 or user_idx >= n_users:
        raise ValueError(f"user_idx {user_idx} di luar jangkauan 0..{n_users-1}")

    user_vector = user_item_matrix.tocsr()[user_idx]

    ids, scores = bpr_model.recommend(
        userid=user_idx,
        user_items=user_vector,
        N=N,
        filter_already_liked_items=True
    )

    orig_item_ids = item_enc.inverse_transform([int(i) for i in ids])
    return pd.DataFrame({
        "item_idx": ids.astype(int),
        "item_id": orig_item_ids,
        "score": scores
    })

In [8]:
# Tahap 7: Split Data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_matrix = csr_matrix(
    (np.ones(len(train_df), dtype=np.float32), (train_df["user_idx"], train_df["item_idx"])),
    shape=(n_users, n_items)
)

test_matrix = csr_matrix(
    (np.ones(len(test_df), dtype=np.float32), (test_df["user_idx"], test_df["item_idx"])),
    shape=(n_users, n_items)
)

In [9]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# Konversi ke matriks sparse
train_matrix = csr_matrix(
    (np.ones(len(train_df), dtype=np.float32), (train_df["user_idx"], train_df["item_idx"])),
    shape=(n_users, n_items)
)

test_matrix = csr_matrix(
    (np.ones(len(test_df), dtype=np.float32), (test_df["user_idx"], test_df["item_idx"])),
    shape=(n_users, n_items)
)

In [10]:
# Tahap 8 : EVALUASI MAP@K

def average_precision_at_k(actual, predicted, k=10):
    """Hitung average precision at k untuk satu user."""

    predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i, p in enumerate(predicted):
        if p in actual:
            num_hits += 1.0
            score += num_hits / (i + 1.0)

    if len(actual) == 0:
        return 0.0

    return score / min(len(actual), k)


def mean_average_precision(model, train_matrix, test_matrix, k=10):
    aps = []

    for user_idx in tqdm(range(train_matrix.shape[0])):
        recommended, _ = model.recommend(
            userid=user_idx,
            user_items=train_matrix[user_idx],
            N=k,
            filter_already_liked_items=True
        )

        actual = test_matrix[user_idx].indices
        if len(actual) == 0:
            continue

        ap = average_precision_at_k(actual, recommended, k)
        aps.append(ap)

    return np.mean(aps) if aps else 0.0


map_score = mean_average_precision(bpr_model, train_matrix, test_matrix, k=10)
print(f"\nMean Average Precision @10: {map_score:.4f}")


100%|██████████| 13876/13876 [02:42<00:00, 85.35it/s]


Mean Average Precision @10: 0.0471





In [11]:
rows = []

TOP_K = 10  # jumlah rekomendasi per user

for user_idx in range(n_users):

    user_vector = train_matrix[user_idx]

    # dapatkan 10 rekomendasi
    ids, scores = bpr_model.recommend(
        userid=user_idx,
        user_items=user_vector,
        N=TOP_K,
        filter_already_liked_items=True
    )

    # convert ke original ID
    orig_user_id = user_enc.inverse_transform([user_idx])[0]
    orig_item_ids = item_enc.inverse_transform(ids.astype(int))

    # gabungkan jadi satu string
    items_joined = " ".join([str(i) for i in orig_item_ids])

    # append ke rows
    rows.append([orig_user_id, items_joined])

# Buat DataFrame
submission_df = pd.DataFrame(rows, columns=["user_id", "item_id"])

# Simpan CSV
submission_df.to_csv("bpr_recommendations.csv", index=False)

print("File rekomendasi berhasil dibuat: bpr_recommendations.csv")
submission_df.head()

File rekomendasi berhasil dibuat: bpr_recommendations.csv


Unnamed: 0,user_id,item_id
0,8,0140430113 0971880107 0671877836 8447333795 05...
1,99,0446525731 0553099558 038072362X 045120915X 04...
2,114,0380731851 0312995423 0316789089 0385508042 06...
3,160,0671877836 8447333795 0062548689 3442410886 01...
4,183,0060586605 0515136549 2253005274 0756401666 84...


In [14]:
# ============================================
# TAHAP 1 — TRAINING MODEL BPR Tuning
# ============================================

# Hyperparameter tuning hasil rekomendasi
bpr_model = BayesianPersonalizedRanking(
    factors=256,           # lebih kaya representasi (naikkan MAP)
    learning_rate=0.008,   # stabil & cepat
    regularization=0.03,   # cegah overfitting
    iterations=250,        # cukup untuk konvergensi
    random_state=42
)

print("\nMulai training BPR (tuned)...")
bpr_model.fit(user_item_matrix)
print("Training selesai.")


# ============================================
# TAHAP 2 — FUNGSI REKOMENDASI
# ============================================

def recommend_for_user(user_idx, N=10):
    """Memberikan rekomendasi item untuk user tertentu."""
    if user_idx < 0 or user_idx >= n_users:
        raise ValueError(f"user_idx {user_idx} di luar jangkauan 0..{n_users-1}")

    user_vector = user_item_matrix.tocsr()[user_idx]

    ids, scores = bpr_model.recommend(
        userid=user_idx,
        user_items=user_vector,
        N=N,
        filter_already_liked_items=True
    )

    orig_item_ids = item_enc.inverse_transform([int(i) for i in ids])
    return pd.DataFrame({
        "item_idx": ids.astype(int),
        "item_id": orig_item_ids,
        "score": scores
    })


# ============================================
# TAHAP 3 — SPLIT DATA (TRAIN / TEST)
# ============================================

from sklearn.model_selection import train_test_split

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_matrix = csr_matrix(
    (np.ones(len(train_df), dtype=np.float32), (train_df["user_idx"], train_df["item_idx"])),
    shape=(n_users, n_items)
)

test_matrix = csr_matrix(
    (np.ones(len(test_df), dtype=np.float32), (test_df["user_idx"], test_df["item_idx"])),
    shape=(n_users, n_items)
)


# ============================================
# TAHAP 4 — EVALUASI MAP@10
# ============================================

def average_precision_at_k(actual, predicted, k=10):
    """Hitung average precision at k untuk satu user."""
    predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i, p in enumerate(predicted):
        if p in actual:
            num_hits += 1.0
            score += num_hits / (i + 1.0)

    if len(actual) == 0:
        return 0.0

    return score / min(len(actual), k)


def mean_average_precision(model, train_matrix, test_matrix, k=10):
    aps = []

    for user_idx in tqdm(range(train_matrix.shape[0])):
        recommended, _ = model.recommend(
            userid=user_idx,
            user_items=train_matrix[user_idx],
            N=k,
            filter_already_liked_items=True
        )

        actual = test_matrix[user_idx].indices
        if len(actual) == 0:
            continue

        ap = average_precision_at_k(actual, recommended, k)
        aps.append(ap)

    return np.mean(aps) if aps else 0.0


# Jalankan evaluasi akhir
map_score = mean_average_precision(bpr_model, train_matrix, test_matrix, k=10)
print(f"\nMean Average Precision @10 (TUNED): {map_score:.4f}")


Mulai training BPR (tuned)...


  0%|          | 0/250 [00:00<?, ?it/s]

Training selesai.


100%|██████████| 13876/13876 [04:29<00:00, 51.55it/s]


Mean Average Precision @10 (TUNED): 0.0984





In [None]:
# ============================================
# TAHAP 5 — Menyimpan CSV
# ============================================

rows = []

TOP_K = 10  # jumlah rekomendasi per user

for user_idx in range(n_users):

    user_vector = train_matrix[user_idx]

    # dapatkan 10 rekomendasi
    ids, scores = bpr_model.recommend(
        userid=user_idx,
        user_items=user_vector,
        N=TOP_K,
        filter_already_liked_items=True
    )

    # convert ke original ID
    orig_user_id = user_enc.inverse_transform([user_idx])[0]
    orig_item_ids = item_enc.inverse_transform(ids.astype(int))

    # gabungkan jadi satu string
    items_joined = " ".join([str(i) for i in orig_item_ids])

    # append ke rows
    rows.append([orig_user_id, items_joined])

# Buat DataFrame
submission_df = pd.DataFrame(rows, columns=["user_id", "item_id"])

# Simpan CSV
submission_df.to_csv("bprtuning_recommendations.csv", index=False)

print("File rekomendasi berhasil dibuat: bprtuning_recommendations.csv")
submission_df.head()