In [None]:
# ======================================================
# Tahap 1: Load Data
# ======================================================
import pandas as pd

train = pd.read_csv("train.csv")
target_users = pd.read_csv("data_target_users_test.csv")
sample_submission = pd.read_csv("sample_submission.csv")

print("Train data:")
display(train.head())

print("\nTarget users:")
display(target_users.head())

print("\nSample submission:")
display(sample_submission.head())

print(f"\nTotal interaksi: {len(train)}")

In [None]:
# ======================================================
# Tahap 2: Eksplorasi Data
# ======================================================

n_users = train['user_id'].nunique()
n_items = train['item_id'].nunique()

print(f"Jumlah user unik: {n_users}")
print(f"Jumlah item unik: {n_items}")

interaksi_user = train['user_id'].value_counts()
print("\nRata-rata interaksi per user:", interaksi_user.mean())

interaksi_item = train['item_id'].value_counts()
print("\nRata-rata interaksi per item:", interaksi_item.mean())

In [None]:
# ======================================================
# Tahap 3: Baseline Popularity (Tetap seperti punyamu)
# ======================================================

item_popularity = train['item_id'].value_counts().reset_index()
item_popularity.columns = ['item_id', 'popularity']

top10_items = item_popularity['item_id'].head(10).tolist()
print("Top 10 items:", top10_items)

baseline_recommendations = {
    user: top10_items for user in target_users['user_id']
}

baseline_df = pd.DataFrame({
    'user_id': list(baseline_recommendations.keys()),
    'item_id': [','.join(map(str, top10_items))] * len(baseline_recommendations)
})

display(baseline_df.head())

In [None]:
!pip install implicit==0.7.2

In [None]:
# ======================================================
# Tahap 4: Model ALS (Revisi)
# ======================================================
import scipy.sparse as sparse
from implicit.als import AlternatingLeastSquares
from sklearn.preprocessing import LabelEncoder

# --- Encode user dan item ke ID numerik ---
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()

train['user_idx'] = user_encoder.fit_transform(train['user_id'])
train['item_idx'] = item_encoder.fit_transform(train['item_id'])

# --- Matrix user x item (CSR) ---
rows = train['user_idx']
cols = train['item_idx']
data = [1] * len(train)

user_item_matrix = sparse.csr_matrix(
    (data, (rows, cols)),
    shape=(train['user_idx'].nunique(), train['item_idx'].nunique())
)

print("Matrix shape (users x items):", user_item_matrix.shape)

# --- Bangun dan latih model ALS ---
model = AlternatingLeastSquares(
    factors=50,
    regularization=0.01,
    iterations=15,
    use_gpu=False
)

model.fit(user_item_matrix)
print("✅ Model ALS selesai dilatih")

In [None]:
# Tahap 5: Generate rekomendasi untuk 10 item per user target
from scipy.sparse import csr_matrix
import numpy as np

recommendations = []

for user_id in target_users['user_id']:

    if user_id in user_encoder.classes_:

        user_idx = int(user_encoder.transform([user_id])[0])

        # ✅ Ambil baris user → ini CSR
        user_interactions = user_item_matrix[user_idx]

        # ✅ Generate rekomendasi
        recs = model.recommend(
            userid=user_idx,
            user_items=user_interactions,
            N=10,
            filter_already_liked_items=True
        )

        # ✅ Ambil id item
        rec_item_idxs = [int(r[0]) for r in recs]

        # ✅ Decode kembali ke item_id
        rec_items = item_encoder.inverse_transform(rec_item_idxs)

    else:
        rec_items = top10_items     # fallback untuk user baru

    recommendations.append({
        "user_id": user_id,
        "item_id": ",".join(map(str, rec_items))
    })

recommendation_df = pd.DataFrame(recommendations)
display(recommendation_df.head())

In [None]:
# Tahap 6: Evaluasi MAP@10
from tqdm import tqdm
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split

# --- 1️⃣ Split data menjadi train_data dan val_data ---
train_data, val_data = train_test_split(train, test_size=0.2, random_state=42)
print(f"Train size: {len(train_data)} | Validasi size: {len(val_data)}")

# --- 2️⃣ Buat matrix user-item untuk train dan val ---
n_users = train['user_idx'].nunique()
n_items = train['item_idx'].nunique()

train_mat = csr_matrix(
    (np.ones(len(train_data), dtype=np.float32),
     (train_data['user_idx'].to_numpy(), train_data['item_idx'].to_numpy())),
    shape=(n_users, n_items)
)

val_mat = csr_matrix(
    (np.ones(len(val_data), dtype=np.float32),
     (val_data['user_idx'].to_numpy(), val_data['item_idx'].to_numpy())),
    shape=(n_users, n_items)
)

print("Matrix dibuat ✅")
print("train_mat shape:", train_mat.shape)
print("val_mat shape:", val_mat.shape)


# --- 3️⃣ Fungsi MAP@K ---
def mapk(model, train_mat, val_mat, k=10):
    """
    Hitung MAP@K manual untuk model implicit ALS
    """
    average_precisions = []

    n_users = val_mat.shape[0]
    for user_id in tqdm(range(n_users)):
        # Ambil item relevan dari data validasi
        true_items = val_mat[user_id].indices
        if len(true_items) == 0:
            continue

        # Rekomendasi dari model
        recs = model.recommend(
            userid=user_id,
            user_items=train_mat[user_id],
            N=k,
            filter_already_liked_items=False
        )

        # Tangani format keluaran
        if isinstance(recs, np.ndarray):
            rec_items = [int(r[0]) for r in recs] if recs.ndim > 1 else recs.tolist()
        else:
            rec_items = [int(i[0]) for i in recs]

        # Hitung precision kumulatif
        score = 0.0
        hits = 0
        for idx, item in enumerate(rec_items):
            if item in true_items:
                hits += 1
                score += hits / (idx + 1.0)
        average_precisions.append(score / min(len(true_items), k))

    return np.mean(average_precisions)


# --- 4️⃣ Jalankan evaluasi ---
map10 = mapk(model, train_mat, val_mat, k=10)
print(f"\n✅ MAP@10: {map10:.4f}")

In [None]:
# ======================================================
# Tahap 7: Simpan submission
# ======================================================

recommendation_df.to_csv("submission.csv", index=False)
print("✅ submission.csv berhasil disimpan")

In [None]:
print("Jumlah item unik:", train['item_id'].nunique())