설치하기

In [None]:
!pip install git+https://github.com/gbolmier/funk-svd

In [10]:
from funk_svd.dataset import fetch_ml_ratings
from funk_svd import SVD
from sklearn.metrics import mean_absolute_error
import pandas as pd

In [50]:
ds_ratings = pd.read_csv("../ml-latest-small/ratings.csv")
ds_movies = pd.read_csv("../ml-latest-small/movies.csv")
# funk-svd 는 유저 칼럼이 u_id, 아이템 칼럼이 i_id여서 이름을 변경해줍니다
df = ds_ratings.rename(
    {
        "userId": "u_id",
        "movieId": "i_id"
    },
    axis=1
)
df

Unnamed: 0,u_id,i_id,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
...,...,...,...,...
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352


In [51]:
# 80%를 학습에 사용하고 10%를 validation set, 10%를 test_set으로 사용합니다
train = df.sample(frac=0.8, random_state=7)
val = df.drop(train.index.tolist()).sample(frac=0.5, random_state=8)
test = df.drop(train.index.tolist()).drop(val.index.tolist())

In [41]:
# SVD를 학습합니다. n_factors가 SVD의 k를 의미합니다
svd = SVD(lr=0.001, reg=0.005, n_epochs=100, n_factors=15, early_stopping=True,
          shuffle=False, min_rating=1, max_rating=5)
svd.fit(X=train, X_val=val)

# 학습 결과를 test set 으로 평가합니다.
pred = svd.predict(test)
mae = mean_absolute_error(test['rating'], pred)

print(f'Test MAE: {mae:.2f}')

Preprocessing data...

Preprocessing data...

Epoch 1/100  | val_loss: 0.95 - val_rmse: 0.98 - val_mae: 0.78 - took 0.0 sec
Epoch 2/100  | val_loss: 0.91 - val_rmse: 0.95 - val_mae: 0.76 - took 0.0 sec
Epoch 3/100  | val_loss: 0.88 - val_rmse: 0.94 - val_mae: 0.75 - took 0.0 sec
Epoch 4/100  | val_loss: 0.87 - val_rmse: 0.93 - val_mae: 0.74 - took 0.0 sec
Epoch 5/100  | val_loss: 0.85 - val_rmse: 0.92 - val_mae: 0.73 - took 0.0 sec
Epoch 6/100  | val_loss: 0.84 - val_rmse: 0.92 - val_mae: 0.72 - took 0.0 sec
Epoch 7/100  | val_loss: 0.84 - val_rmse: 0.91 - val_mae: 0.72 - took 0.0 sec
Epoch 8/100  | val_loss: 0.83 - val_rmse: 0.91 - val_mae: 0.71 - took 0.0 sec
Epoch 9/100  | val_loss: 0.82 - val_rmse: 0.91 - val_mae: 0.71 - took 0.0 sec
Epoch 10/100 | val_loss: 0.82 - val_rmse: 0.90 - val_mae: 0.71 - took 0.0 sec
Epoch 11/100 | val_loss: 0.81 - val_rmse: 0.90 - val_mae: 0.70 - took 0.0 sec
Epoch 12/100 | val_loss: 0.81 - val_rmse: 0.90 - val_mae: 0.70 - took 0.0 sec
Epoch 13/100 | val

In [68]:
user_ratings = ds_ratings.pivot(index="userId", columns="movieId", values="rating")
def get_user_real_score(user_id, item_id):
    return user_ratings.loc[user_id, item_id]

In [67]:

def get_user_unseen_ranks(user_id, max_rank=100):
    # predict_pair 메서드를 이용해서
    # 유저의 모든 영화 예측 평점을 계산합니다.
    movie_ids = df.i_id.unique()
    rec = pd.DataFrame(
        [{
            "id": id, 
            "recommendation_score": svd.predict_pair(user_id, id), 
            "real_score": get_user_real_score(user_id, id)
        } 
         for id in movie_ids
        ]
    )
    
    # 유저가 본 영화는 제외합니다
    user_seen_movies = train[train.u_id == user_id]
    rec = rec[~rec.id.isin(user_seen_movies.i_id)]
    rec.sort_values("recommendation_score", ascending=False, inplace=True)
    
    # max_rank 개만 보여줍니다
    if max_rank is not None:
        rec = rec.head(max_rank)
        
    # 순위를 컬럼에 추가합니다
    rec["rank"] = range(1, len(rec) + 1)
    
    # train 에는 포함되지 않았지만 실제로 유저가 봤던 영화 ID를 가져옵니다
    # 이후 추천 결과에서 해당 영화들만 필터링해서 몇 위로 추천됬는지 확인합니다
    user_unseen_movies = pd.concat([val, test], axis=0)
    user_unseen_movies = user_unseen_movies[user_unseen_movies.u_id == user_id].i_id
    rec = rec[rec.id.isin(user_unseen_movies)]
    rec.index = rec.id
    del rec["id"]
    
    # 실제 추천할 영화 정보와 join합니다.
    rec = ds_movies.merge(rec, left_on="movieId", right_index=True)
    rec.sort_values("rank", inplace=True)
    
    top_k_accuracy = len(rec) / len(user_unseen_movies)
    return rec, top_k_accuracy

from IPython.display import display

user_ids = df.u_id.unique()[:10]
total_acc = 0
for uid in user_ids:
    top100, acc = get_user_unseen_ranks(uid)
    total_acc += acc
    print("User: ", uid, "TOP 100 accuracy: ", round(acc, 2))
    display(top100)

total_acc / len(user_ids)

User:  1 TOP 100 accuracy:  0.27


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
2226,2959,Fight Club (1999),Action|Crime|Drama|Thriller,4.968123,5.0,2
257,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,4.945892,3.0,4
314,356,Forrest Gump (1994),Comedy|Drama|Romance|War,4.9442,4.0,5
914,1213,Goodfellas (1990),Crime|Drama,4.943481,5.0,6
911,1210,Star Wars: Episode VI - Return of the Jedi (1983),Action|Adventure|Sci-Fi,4.903188,5.0,8
1503,2028,Saving Private Ryan (1998),Action|Drama|War,4.886085,4.0,11
398,457,"Fugitive, The (1993)",Thriller,4.784711,5.0,21
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,4.708929,4.0,39
957,1258,"Shining, The (1980)",Horror,4.670629,3.0,54
920,1219,Psycho (1960),Crime|Horror,4.639454,2.0,68


User:  2 TOP 100 accuracy:  0.5


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
6710,58559,"Dark Knight, The (2008)",Action|Crime|Drama|IMAX,4.345709,4.5,5
4615,6874,Kill Bill: Vol. 1 (2003),Action|Crime|Thriller,4.060282,4.0,52


User:  3 TOP 100 accuracy:  0.11


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
2945,3949,Requiem for a Dream (2000),Drama,3.460449,0.5,78


User:  4 TOP 100 accuracy:  0.09


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
3141,4226,Memento (2000),Mystery|Thriller,4.108509,2.0,7
914,1213,Goodfellas (1990),Crime|Drama,4.104585,4.0,8
2259,2997,Being John Malkovich (1999),Comedy|Drama|Fantasy,3.874308,4.0,52
701,919,"Wizard of Oz, The (1939)",Adventure|Children|Fantasy|Musical,3.867527,5.0,58


User:  5 TOP 100 accuracy:  0.09


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
461,527,Schindler's List (1993),Drama|War,4.135568,5.0,6


User:  6 TOP 100 accuracy:  0.02


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,4.176751,1.0,11


User:  7 TOP 100 accuracy:  0.23


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
898,1196,Star Wars: Episode V - The Empire Strikes Back...,Action|Adventure|Sci-Fi,3.920073,4.0,3
3984,5618,Spirited Away (Sen to Chihiro no kamikakushi) ...,Adventure|Animation|Fantasy,3.703772,5.0,27
3640,4995,"Beautiful Mind, A (2001)",Drama|Romance,3.651338,4.5,42
5374,8961,"Incredibles, The (2004)",Action|Adventure|Animation|Children|Comedy,3.566991,4.0,83
1211,1610,"Hunt for Red October, The (1990)",Action|Adventure|Thriller,3.554836,4.0,90


User:  8 TOP 100 accuracy:  0.33


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller,4.241019,5.0,4
257,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,4.229812,4.0,7
314,356,Forrest Gump (1994),Comedy|Drama|Romance|War,4.224687,3.0,9


User:  9 TOP 100 accuracy:  0.22


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
900,1198,Raiders of the Lost Ark (Indiana Jones and the...,Action|Adventure,3.924689,5.0,22
3638,4993,"Lord of the Rings: The Fellowship of the Ring,...",Adventure|Fantasy,3.845068,5.0,32


User:  10 TOP 100 accuracy:  0.07


Unnamed: 0,movieId,title,genres,recommendation_score,real_score,rank
257,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller,3.974648,1.0,4
5917,33794,Batman Begins (2005),Action|Crime|IMAX,3.623362,5.0,98


0.19191612170335576