### Import Libraries

In [2]:
import pandas as pd
import numpy as np

from lightfm.data import Dataset
from lightfm import LightFM
from lightfm.evaluation import precision_at_k

from tabulate import tabulate

from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity

import pickle

### Read Data

In [2]:
score = pd.read_csv('data/score.csv')
score.drop(['Unnamed: 0', 'id'], axis='columns', inplace=True)

In [3]:
post = pd.read_csv('data/post.csv')
post.drop(['Unnamed: 0'], axis='columns', inplace=True)

In [4]:
category = pd.read_csv('data/category.csv')
category.drop(['Unnamed: 0'], axis='columns', inplace=True)

In [5]:
user = pd.read_csv('data/user.csv')
user.drop(['Unnamed: 0'], axis='columns', inplace=True)
# user.drop(0, axis='rows', inplace=True)

### LightFM (Recommendation Items)

In [6]:
users_ids = user['id'].unique()
posts_ids = post['id'].unique()

dataset = Dataset()
dataset.fit(users=users_ids, items=posts_ids)

In [7]:
(user_id_map, user_feature_map, item_id_map, item_feature_map) = dataset.mapping()

In [8]:
def get_data():
    data = []
    df = score[['user__id', 'post__id', 'score']]
    for row in df.itertuples():
        data.append((row[1], row[2], row[3]))
    return data

data = get_data()


(interactions, weights) = dataset.build_interactions(data=data)

interactions

<1001x50000 sparse matrix of type '<class 'numpy.int32'>'
	with 40000 stored elements in COOrdinate format>

In [9]:
# %%time
model = LightFM(loss='warp')
model.fit(interactions=interactions, epochs=30, num_threads=2)

<lightfm.lightfm.LightFM at 0x7ff405f68250>

In [10]:
print("Precision: %.2f" % precision_at_k(model, interactions, k=5).mean())

Precision: 0.91


In [11]:
def get_top_k_known_positive_items(user_id, top_k, interactions, user_id_map, item_id_map):

    # use tocsr function to iterate spares matrix
    known_positives_model_ids = [
        interactions.tocsr()[list(user_id_map).index(user_id)].indices]

    # mapping to item ids
    known_positives_ids = [list(item_id_map)[id]
                           for id in known_positives_model_ids[0]]

    # find items by ids
    known_positive_items = post[['id', 'title', 'category__id']][post['id'].isin(known_positives_ids)]

    return known_positive_items[0:top_k]

In [12]:
def get_top_k_items(user_id, top_k, model, interactions, user_id_map):
    n_users, n_items = interactions.shape
    scores = model.predict(user_id_map[user_id], np.arange(n_items))
    top_items_model_ids = np.argsort(-scores)[:top_k]
    top_items = post.loc[top_items_model_ids]  # searching by ids of model not real ids of posts
    return top_items[['id', 'title', 'category__id']]

In [13]:
def sample_recommendation(model, interactions, user_ids):

    for user_id in user_ids:
        known_positives_items = get_top_k_known_positive_items(user_id=user_id,
                                                               top_k=5,
                                                               interactions=interactions,
                                                               user_id_map=user_id_map,
                                                               item_id_map=item_id_map)

        top_k_items = get_top_k_items(user_id=user_id, top_k=5,
                                    model=model, interactions=interactions)

        print(f"User id: {user_id}", end='\n\n')
        
        print("Known positives:")
        print(tabulate(known_positives_items, headers='keys', tablefmt='github'), end='\n\n')

        print("Recommended:")
        print(tabulate(top_k_items, headers='keys', tablefmt='github'))


In [14]:
sample_recommendation(model=model, interactions=interactions,
                      user_ids=[800])


User id: 800

Known positives:
|      |   id | title                   |   category__id |
|------|------|-------------------------|----------------|
|   82 | 1323 | Dunlap, Moran and Brown |            540 |
| 1083 | 2324 | Caldwell and Sons       |            541 |
| 2084 | 3325 | Silva LLC               |            542 |
| 3085 | 4326 | Morton-Mathis           |            543 |
| 4086 | 5327 | Thomas-Jackson          |            544 |

Recommended:
|       |    id | title                         |   category__id |
|-------|-------|-------------------------------|----------------|
|    82 |  1323 | Dunlap, Moran and Brown       |            540 |
|  9091 | 10332 | Miller-Bridges                |            539 |
| 13132 | 14373 | Cabrera, Johnson and Jennings |            540 |
| 32114 | 33355 | Weaver and Sons               |            542 |
|  6088 |  7329 | Russo-Zimmerman               |            546 |


### Cosine Similarity (Similar Items)

In [23]:
def create_cosine_item_similarity_matrix(n_items, model):
    item_embeddings_sparse_matrix = csr_matrix(model.item_embeddings[:n_items])
    item_similarities_matrix = cosine_similarity(item_embeddings_sparse_matrix)
    return item_similarities_matrix

In [24]:
item_similarities_matrix = create_cosine_item_similarity_matrix(10000, model)

def get_top_k_similar_items(item_id, top_k, item_similarities_matrix):
    item_similarities_df = pd.DataFrame(item_similarities_matrix)
    similar_items_model_ids = np.argsort(
        -item_similarities_df[list(item_id_map.keys()).index(item_id)])[0:top_k + 1]
    top_k_similar_items = post[['id', 'title', 'category__id']].loc[similar_items_model_ids]
    return top_k_similar_items

In [25]:
get_top_k_similar_items(2700, 5, item_similarities_matrix)

Unnamed: 0,id,title,category__id
1459,2700,Swanson Ltd,547
4462,5703,"Scott, Gordon and Dickerson",540
9467,10708,Mccann-Martin,545
5463,6704,Harrell-Young,541
425,1666,"Stewart, Garcia and Bright",543
2460,3701,Dudley and Sons,538


### Save Model

In [None]:
with open('recommendation_model.pickle', 'wb') as file:
    pickle.dump(model, file)

### Load Model

In [None]:
with open('recommendation_model.pickle', 'rb') as file:
    loaded_model = pickle.load(file)

sample_recommendation(model=loaded_model, interactions=interactions, user_ids=[800])


### Save Dataset (for using id mapping)

In [None]:
with open('dataset.pickle', 'wb') as file:
    pickle.dump(dataset, file)

### Load Dataset

In [None]:
with open('dataset.pickle', 'rb') as file:
    loaded_dataset = pickle.load(file)

loaded_dataset

### Save Interactions Matrix

In [None]:
with open('interactions.pickle', 'wb') as file:
    pickle.dump(interactions, file)

In [None]:
with open('interactions.pickle', 'rb') as file:
    loaded_interactions = pickle.load(file)

loaded_interactions

### Save Dataset (for using id mapping)

In [8]:
with open('dataset.pickle', 'wb') as file:
    pickle.dump(dataset, file)

### Load Dataset

In [11]:
with open('dataset.pickle', 'rb') as file:
    loaded_dataset = pickle.load(file)

loaded_dataset

<lightfm.data.Dataset at 0x7fbd6eb0e670>

### Save Cosine Item Similarity Matrix (for similar items)

In [None]:
with open('item_similarity_matrix.pickle', 'wb') as file:
    pickle.dump(item_similarities_matrix, file)

In [None]:
with open('item_similarity_matrix.pickle', 'rb') as file:
    loaded_item_similarity_matrix = pickle.load(file)

loaded_item_similarity_matrix