# Collaborative Filtering Recommendations using Normalized Values

In [107]:
import pandas as pd
import numpy as np
import implicit

from scipy.sparse import csr_matrix


In [108]:
train = pd.read_csv("data\\train-plays.csv")
test = pd.read_csv("data\\test-plays.csv")
game_coding = pd.read_csv("data\\game-coding.csv")


In [109]:
train.shape[0]


39893

## Format training data

In [110]:
game_user = train.pivot(
    index="game_id", columns="user_id", values="norm_amount")

In [111]:
game_user = game_user.fillna(0)

In [112]:
user_coding = pd.DataFrame(
    {"original": game_user.columns, "coded": np.arange(game_user.columns.size)})

In [113]:
game_user_sparse = csr_matrix(game_user)

In [114]:
user_game = game_user.T

In [115]:
user_game_sparse = csr_matrix(user_game)

In [116]:
user_ids = train['user_id'].unique()

In [117]:
# Number of possible interactions in the matrix
matrix_size = game_user_sparse.shape[0] * game_user_sparse.shape[1]
# Number of items interacted with
num_played = len(game_user_sparse.nonzero()[0])
sparsity = 100 * (1 - (num_played / matrix_size))
sparsity

98.86683387172529

# Alternating Least Squares

In [118]:
als = implicit.als.AlternatingLeastSquares(128, 0.05, iterations=50)

In [119]:
als.fit(game_user_sparse)

100%|██████████| 50/50 [00:01<00:00, 26.41it/s]


In [120]:
game_recs = np.ndarray.flatten(als.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id": user_10, "game_id": game_recs})

100%|██████████| 3407/3407 [00:00<00:00, 47317.70it/s]


In [121]:
results = recommendations.merge(test, on="user_id")

In [122]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [123]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.14000587026709715

In [124]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.829622458857696

# Bayesian Personalized Ranking

In [125]:
bpr = implicit.bpr.BayesianPersonalizedRanking(128, 0.01, 0.05, iterations=200)

In [126]:
bpr.fit(game_user_sparse)

100%|██████████| 200/200 [00:03<00:00, 66.27it/s, train_auc=87.74%, skipped=9.11%]


In [127]:
game_recs = np.ndarray.flatten(bpr.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

100%|██████████| 3407/3407 [00:00<00:00, 36249.24it/s]


In [128]:
results = recommendations.merge(test, on="user_id")

In [129]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [130]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.2456706780158497

In [131]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.850919651500484

# Logistic Matrix Factorization

In [132]:
lmf = implicit.lmf.LogisticMatrixFactorization(128)

In [133]:
lmf.fit(game_user_sparse)

100%|██████████| 30/30 [00:06<00:00,  4.99it/s]


In [134]:
game_recs = np.ndarray.flatten(lmf.recommend_all(user_game_sparse))
user_10 = np.repeat(user_game.index, 10)
recommendations = pd.DataFrame({"user_id":user_10, "game_id":game_recs})

100%|██████████| 3407/3407 [00:00<00:00, 51621.41it/s]


In [135]:
results = recommendations.merge(test, on="user_id")

In [136]:
results['recommended'] = np.where(results.game_id_x == results.game_id_y, 1, 0)

In [137]:
# accuracy
results[results.recommended == 1].shape[0] / test.shape[0]

0.04490754329321984

In [138]:
# % of games recommended
np.unique(game_recs).size / train.game_id.unique().size

0.7938044530493708

# ALS + BPR

In [139]:
user_recs = dict()
for user_id in user_ids:
    res = set(idx for idx, score in als.recommend(user_id, user_game_sparse))
    user_recs[user_id] = res


In [140]:
for user_id in user_ids:
    res = set(idx for idx, score in bpr.recommend(user_id, user_game_sparse))
    als_res = user_recs[user_id]
    for r in res:
        als_res.add(r)
    user_recs[user_id] = als_res


In [141]:
num_total = 0
num_correct = 0
for user_id in user_ids:
    test_df = test[test['user_id'] == user_id]
    test_game_id = test_df['game_id'][user_id]
    num_total = num_total + 1
    if test_game_id in user_recs[user_id]:
        num_correct = num_correct + 1

print(num_correct / num_total)


0.29292632814793074
