In [1]:
import numpy as np
from dataset import Dataset
import implicit
from implicit.evaluation import ranking_metrics_at_k, precision_at_k, train_test_split, AUC_at_k, mean_average_precision_at_k, ndcg_at_k
from rankfm.rankfm import RankFM
from utils.tools import ranking_metrics_at_k_rankfm, recommend_cold_start

Connected to MongoDB


In [None]:
data = Dataset.get_data_from_mongo()
data.save('dataset.pkl')

In [2]:
data = Dataset.load('dataset.pkl')

In [3]:
data.player_feature_to_ix

{'specialization_10': 0,
 'specialization_11': 1,
 'specialization_12': 2,
 'specialization_5': 3,
 'specialization_6': 4,
 'specialization_7': 5,
 'specialization_8': 6,
 'specialization_9': 7,
 'specialization_nan': 8}

In [3]:
# split 80% train, 20% test

x_train, y_train, x_test, y_test = data.train_test_split_interaction(
    train_percentage=0.8, random_state=42
)  # for fm
train, test = data.train_test_split_sparse(
    train_percentage=0.8, random_state=42
)  # for knn, als

In [4]:
knn_model = implicit.nearest_neighbours.CosineRecommender(K=10)
knn_model.fit(train)
print(ranking_metrics_at_k(knn_model, train, test))



  0%|          | 0/212 [00:00<?, ?it/s]

  0%|          | 0/1191 [00:00<?, ?it/s]

{'precision': 0.9475537787930707, 'map': 0.8862604061355857, 'ndcg': 0.9184897626473643, 'auc': 0.7157208976910755}


In [5]:
knn_model.save('knn_model.npz')

In [6]:
als_model = implicit.als.AlternatingLeastSquares(factors=50, alpha=70, random_state=0)
als_model.fit(train)
print(ranking_metrics_at_k(als_model, train, test))

  check_blas_config()
  check_blas_config()


  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/1191 [00:00<?, ?it/s]

{'precision': 0.9715400723396155, 'map': 0.9293158616029052, 'ndcg': 0.9508460027803319, 'auc': 0.7311049760139611}


In [7]:
als_model.save('als_model.npz')

In [8]:
# uf, if, w for FM
user_features = data.player_features
item_features = data.question_features
weights = data.get_numpy_weights()

In [9]:
train_players = np.unique(x_train[:, 0])
test_players = np.unique(x_test[:, 0])
train_questions = np.unique(x_train[:, 1])
test_questions = np.unique(x_test[:, 1])

cold_start_players = set(test_players) - set(train_players)
cold_start_questions = set(test_questions) - set(train_questions)

print(cold_start_players, cold_start_questions)

{992, 1024, 808, 522, 237, 820, 1180} set()


In [10]:
train_user_features = user_features[np.isin(user_features[:, 0], train_players)]
# test_user_features = user_features[np.isin(user_features[:, 0], test_players)]
train_item_features = item_features[np.isin(item_features[:, 0], train_questions)]
# test_item_features = item_features[np.isin(item_features[:, 0], test_questions)]

In [11]:
fm_model = RankFM(
    factors=50,
    loss="warp",
    max_samples=20,
    learning_schedule='invscaling', 
)
fm_model.fit(interactions=x_train, user_features=train_user_features, item_features=train_item_features, sample_weight=y_train, epochs=20, verbose=True)
print(ranking_metrics_at_k_rankfm(fm_model, x_train, y_train, x_test, y_test, K=10))

100%|██████████| 1189/1189 [00:00<00:00, 1214.76it/s]

{'precision': 0.9505411049933549, 'map': 0.8848568188839874, 'ndcg': 0.915542389149842, 'auc': 0.7116658070123991}





In [12]:
import pickle

with open("fm_model.pkl", "wb") as file:
    pickle.dump(fm_model, file)