## Alternating Laest Squares (ALS) for Matrix Factorization
* Implicit feedback 데이터를 바탕으로 추천하는 방식 탐색

`Implicit feedback은 사용자의 선호도를 나타내는 데이터로서, 구매, 클릭, 검색 기록 등을 포함한다.  
사용자가 직접 좋아요, 싫어요등 평가를 내린 데이터가 아님`

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from tqdm import tqdm
from datasets import load_dataset
from sklearn.model_selection import train_test_split
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import implicit

In [2]:
data = load_dataset("nbtpj/movielens-1m-ratings")["train"].shuffle(seed=10).select(range(200000))
movielens_df = pd.DataFrame(data)
movielens_df = movielens_df[["user_id", "movie_id", "user_rating"]]

user_ids = movielens_df["user_id"].unique()
user_id_map = {id: index for index, id in enumerate(user_ids)}
movie_ids = movielens_df["movie_id"].unique()
movie_id_map = {id: index for index, id in enumerate(movie_ids)}

movielens_df["user_id"] = movielens_df["user_id"].map(user_id_map)
movielens_df["movie_id"] = movielens_df["movie_id"].map(movie_id_map)

Found cached dataset parquet (/Users/jaypark/.cache/huggingface/datasets/nbtpj___parquet/nbtpj--movielens-1m-ratings-b1aa424c884e38a3/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)


  0%|          | 0/1 [00:00<?, ?it/s]

Loading cached shuffled indices for dataset at /Users/jaypark/.cache/huggingface/datasets/nbtpj___parquet/nbtpj--movielens-1m-ratings-b1aa424c884e38a3/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec/cache-15aa091e090a52af.arrow


In [3]:
train_data, test_data = train_test_split(movielens_df, test_size=0.2, random_state=10)

ratings_matrix = csr_matrix(
    (train_data["user_rating"].astype(float), (train_data["user_id"], train_data["movie_id"])),
    shape=(len(user_ids), len(movie_ids))
)

In [5]:
model_als = implicit.als.AlternatingLeastSquares(factors=30, regularization=0.05, iterations=20)
model_als.fit(ratings_matrix)

  0%|          | 0/20 [00:00<?, ?it/s]

In [6]:
user_id = 0
k = 10

recommended = model_als.recommend(user_id, ratings_matrix[user_id], N=k)
df_recommeded = pd.DataFrame(zip(*recommended), columns=["item_id", "score"])

df_recommeded


Unnamed: 0,item_id,score
0,195,1.219226
1,1300,1.166343
2,467,1.122641
3,367,1.114
4,569,1.062542
5,116,1.035805
6,1124,0.977558
7,529,0.974799
8,106,0.95012
9,1191,0.933956
