In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, diags, isspmatrix


from polara import get_movielens_data
from polara.preprocessing.dataframes import leave_one_out, reindex

from dataprep import transform_indices, verify_time_split, generate_interactions_matrix, \
                    cosine_similarity_zd, leave_last_out
from evaluation import topn_recommendations, model_evaluate, downvote_seen_items
from sklearn.metrics.pairwise import cosine_similarity

# Task

Implement two variants of user-based KNN for the top-$n$ recommendations task when:
1. similarity matrix is symmetric,
2. similarity matrix is asymmetric.

Recall, there's no reason for implementing row-wise weighting scheme in user-based KNN. So choose the weighting scheme wisely.

 In your experiments:  
- Test your solution against both weak and strong generalization. 
  - In total you'll have 4 different experiments.
- Follow the "most-recent-item" sampling strategy for constructing holdout.
  - Explain potential issues of this scheme in relation to both weak and strong generalization.  
- Report evaluation metrics, compare the models, and analyse the results.  
- Use Movielens-1M data.

**Note**: you can reuse some code from seminars if necessary.

In [2]:
data = get_movielens_data(include_time=True)

# Weak generalization test

## Preparing data (1 pts)

Your task is
- split data into training and holdout parts
- build a new internal contiguous representation of user and item index based on the training data
- make sure same index is used in the holdout data

In [3]:
# split most recent holdout item from each user
training_, holdout_ = leave_one_out(
    data,
    target='timestamp',
    sample_top=True,
    random_state=0
)

# check correct time splitting
verify_time_split(training_, holdout_)

In [4]:
# reindex data to make contiguous index starting from 0 for user and item IDs
training, data_index = transform_indices(training_, 'userid', 'movieid')

# apply new index to the holdout data
holdout = reindex(holdout_, data_index.values(), filter_invalid=True)
holdout = holdout.sort_values('userid')

Filtered 2 invalid observations.


- Let's also populate data description dictionary for convenience.
- It allows using uniform names for users and items field.
  - This way the code does't depend on the actual names in you dataset.
  - So later you can easily switch to another dataset without changing the code fo the pipeline.


In [5]:
data_description = dict(
    users = data_index['users'].name,
    items = data_index['items'].name,
    feedback = 'rating',
    n_users = len(data_index['users']),
    n_items = len(data_index['items']),
    test_users = holdout[data_index['users'].name].values
)

As previously, let's also explicitly store our testset (i.e., ratings of test users excluding holdout items).

In [6]:
userid = data_description['users']
seen_idx_mask = training[userid].isin(data_description['test_users'])
testset = training[seen_idx_mask]

## Models implementation

### Symmetric case (5 pts)

- You can consult the code from seminars or implement your own solution as long as it is fast enough.

- Recall that subsampling of the neighborhood not only makes the algorithm run faster, but can also improve the results.  
- **Make sure to implement some kind of neighborhood subsampling.**

In [7]:
def truncate_similarity(similarity, k=30):
    '''
    For every row in similarity matrix, pick at most k entities
    with the highest similarity scores. Disregard everything else.
    '''
    similarity = similarity.tocsr()
    inds = similarity.indices
    ptrs = similarity.indptr
    data = similarity.data
    new_ptrs = [0]
    new_inds = []
    new_data = []
    for i in range(len(ptrs)-1):
        start, stop = ptrs[i], ptrs[i+1]
        if start < stop:
            data_ = data[start:stop]
            topk = min(len(data_), k)
            idx = np.argpartition(data_, -topk)[-topk:]
            new_data.append(data_[idx])
            new_inds.append(inds[idx+start])
            new_ptrs.append(new_ptrs[-1]+len(idx))
        else:
            new_ptrs.append(new_ptrs[-1])
    new_data = np.concatenate(new_data)
    new_inds = np.concatenate(new_inds)
    truncated = csr_matrix(
        (new_data, new_inds, new_ptrs),
        shape=similarity.shape
    )
    return truncated    


def build_uknn_model(config, data, data_description):
    user_item_mtx = generate_interactions_matrix(data, data_description)

    # compute similarity matrix
    user_similarity = truncate_similarity(
        cosine_similarity_zd(user_item_mtx),
        config['n_neighbors']
    )
    weighted = config['weighted']
    return user_item_mtx, user_similarity, weighted


def uknn_model_scoring(params, testset, testset_description):
    # implement the scoring function to assign scores
    # to all items for test users
    user_item_mtx, user_similarity, weighted = params
    # write your code for scoring, don't forget to return a dense array

    test_users = testset_description['test_users']
    scores = user_similarity.dot(user_item_mtx)

    if weighted == 'unweighted':
        return scores.toarray()[test_users, :]
    elif weighted == 'elementwise':
        normalizer = user_similarity.dot(user_item_mtx.astype('bool'))
        scores = np.nan_to_num(np.divide(scores, normalizer))
        return np.array(scores[test_users, :])
    else:
        raise Exception('wrong weighting scheme')

In [51]:
n_neighbors = 100

uknn_params_uw = build_uknn_model(
    {'weighted': 'unweighted', 'n_neighbors': n_neighbors}, training, data_description
)
uknn_params_ew = build_uknn_model(
    {'weighted': 'elementwise', 'n_neighbors': n_neighbors}, training, data_description
)

In [52]:
uknn_scores_uw = uknn_model_scoring(uknn_params_uw, None, data_description)
uknn_scores_ew = uknn_model_scoring(uknn_params_ew, None, data_description)

In [55]:
downvote_seen_items(uknn_scores_uw, testset, data_description)
downvote_seen_items(uknn_scores_ew, testset, data_description)

In [56]:
uknn_recs_uw = topn_recommendations(uknn_scores_uw)
uknn_recs_ew = topn_recommendations(uknn_scores_ew)

In [57]:
modes = ['unweighted', 'elementwise']
uknn_recs = dict(zip(modes, [uknn_recs_uw, uknn_recs_ew]))


uknn_metrics = {}
for mode, recs in uknn_recs.items():
    uknn_metrics[mode] = metrics = model_evaluate(recs, holdout, data_description)
    print(
        f'Weighting mode: {mode}\n'\
        'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics)
    )

Weighting mode: unweighted
HR=0.085, MRR=0.0286, COV=0.176

Weighting mode: elementwise
HR=0.000994, MRR=0.000426, COV=0.755



Note: recommending items from user history doesn't make sense.

### Asymmetric case (5 pts)

- Your task here is to implement user-based KNN with asymmetric similarity.

In [13]:
def build_uknn_model_asym(config, data, data_description, alpha=1):
    user_item_mtx = generate_interactions_matrix(data, data_description)
    user_similarity = truncate_similarity(
        cosine_similarity_zd(user_item_mtx),
        config['n_neighbors']
    )
    
    D = np.array(user_similarity.sum(axis=0)).squeeze()
    normalizer = diags(np.divide(
            1,
            D,
            where=(D!=0)
        )).power(alpha)
    user_similarity = user_similarity.dot(normalizer)
    
    return user_item_mtx, user_similarity


def uknn_model_scoring_asym(params, testset, testset_description):
    user_item_mtx, user_similarity = params
    test_users = testset_description['test_users']
    
    scores = user_similarity.dot(user_item_mtx)
    return scores[test_users, :].toarray()

In [14]:
uknn_params_asym = build_uknn_model_asym(
    {'weighting': False, 'n_neighbors': n_neighbors}, training, data_description
)

In [15]:
uknn_scores_asym = uknn_model_scoring_asym(uknn_params_asym, None, data_description)

In [16]:
downvote_seen_items(uknn_scores_asym, testset, data_description)

 ## Evaluation (1 pts)

#### Generate top-$n$ recommendations for both models

In [17]:
uknn_recs = topn_recommendations(uknn_scores_uw)

In [18]:
uknn_recs_asym = topn_recommendations(uknn_scores_asym)

### Calculate metrics

In [19]:
modes = ['symmetric', 'asymmetric']
uknn_recs = dict(zip(modes, [uknn_recs, uknn_recs_asym]))


uknn_metrics = {}
for mode, recs in uknn_recs.items():
    if recs is None: continue
    uknn_metrics[mode] = metrics = model_evaluate(recs, holdout, data_description)
    print(
        f'Similarity type: {mode}\n'\
        'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics)
    )

Similarity type: symmetric
HR=0.085, MRR=0.0286, COV=0.176

Similarity type: asymmetric
HR=0.0886, MRR=0.0307, COV=0.275



# Strong generalization test

- Recall that in the strong generalization test you work with the warm-start scenario.
- It means that the set of test users is disjoint from the set of users in the training.
- You're provided with the basic functions to help you perform correct splitting, but there're still a few places where your input is required. Make sure you understand the logic of data splitting in this scenario.

## Preparing data (2 pts)

- Your task is to select a subset of users who have the most recent interactions in their history across entire dataset.
- You will apply holdout splitting to only this subset.
  - Think, why simply taking all users (as in weak generalization test) makes no sense in this scenario. 

In [8]:
def split_by_time(data, time_q=0.95, timeid='timestamp'):
    '''
    Split the input `data` DataFrame into two parts based on the timestamp, with the split point
    being determined by the quantile value `time_q`. The function returns a tuple `(before, after)`
    containing the two DataFrames. The `after` DataFrame contains the rows with timestamps greater
    than or equal to the split point, while the `before` DataFrame contains the remaining rows. 

    Details:
    The `quantile` method of the pandas DataFrame is used to calculate the time point (i.e., timestamp)
    that divides the data into two parts based on the given quantile value `time_q`. Specifically,
    the time point `split_timepoint` is calculated as the `time_q`th quantile of the values in the `timeid`
    column of the `data` DataFrame, using the interpolation method of `nearest`. This means that
    `split_timepoint` is the timestamp at or immediately after which `time_q` percent of the data points occur.    
    '''
    split_timepoint = data[timeid].quantile(q=time_q, interpolation='nearest')
    after = data.query(f'{timeid} >= @split_timepoint') 
    before = data.drop(after.index)
    return before, after

Firstly, you need to select a candidate subset of observations, from which you'll construct the the training, testset, and holdout datssets. Check the `split_by_time` function below and its description in the above cell.

In [9]:
before, after = split_by_time(data, time_q=0.95)

- Now it's time to perform holdout sampling based on the obtained timepoint splitting. 
- Remember, you only sample from the test users.

In [10]:
testset_part_, holdout_ = leave_last_out(after)# your code for holdout sampling

# verify correctness of time-based splitting,
# i.e., for each test user, the holdout contains only future interactions w.r.t to testset
test_indices = testset_part_['userid'].values
holdout_ = holdout_[holdout_.userid.isin(test_indices)]
verify_time_split(testset_part_, holdout_)

In [11]:
training_ = before[~before.userid.isin(test_indices)] # recall that training and testset must be disjoint by users

- Note that `testset_part_` only contains interactions of the test users **after the timepoint**.
- You need to combine it with the remaining histories of these users.

In [12]:
# combine all test users data into a single `testset_` Dataframe.
testset_ = pd.concat(
    [before[before.userid.isin(test_indices)], testset_part_],
    axis = 0,
    ignore_index=False
)

### Building internal representation of user and item index

Use the `transform_indices` function for building a contiguous index starting from 0.

In [13]:
training_strong, data_index_strong = transform_indices(training_, 'userid', 'movieid')

- Before applying new index to the test data:
  - note that the users in the `testset` must be the same as the users in the `holdout`.
- Below is the corresponding function `align_test_by_users` that ensures these two datasets' alignment.

In [14]:
def align_test_by_users(testset, holdout):
    test_users = np.intersect1d(holdout['userid'].values, testset['userid'].values)
    # only allow the same users to be present in both datasets
    testset = testset.query('userid in @test_users').sort_values('userid')
    holdout = holdout.query('userid in @test_users').sort_values('userid')
    return testset, holdout

Let's apply new item index to test data and finalize the test split:

In [15]:
holdout_strong = reindex(holdout_, data_index_strong['items'], filter_invalid=True)
testset_strong = reindex(testset_, data_index_strong['items'], filter_invalid=True)

testset_strong, holdout_strong = align_test_by_users(testset_strong, holdout_strong)

Filtered 4 invalid observations.
Filtered 109 invalid observations.


- Think why we do not apply new index to users here.

## Models implementation

- In this section you'll need to implement user-based KNN models for the warm-start scenario.
- Think carefully which data must be generated at the build time and which data must be generated in the scoring function.

### Symmetric case (5 pts)

In [28]:
def build_uknn_model_strong(config, data, data_description):
    user_item_mtx = generate_interactions_matrix(data, data_description)
    weighted = config['weighted']
    n_size = config['n_neighbors']
    return user_item_mtx, weighted, n_size


def uknn_model_scoring_strong(params, testset, testset_description):
    user_item_mtx, weighted, n_size = params
    user_item_mtx_test = generate_interactions_matrix(testset, testset_description, rebase_users=True)

    similarity = cosine_similarity(user_item_mtx_test, user_item_mtx, dense_output=False)
    similarity.eliminate_zeros()
    similarity = similarity.tocsr()
    
    user_similarity = truncate_similarity(
        similarity,
        n_size
    )
    
    scores = user_similarity.dot(user_item_mtx)
    
    if not weighted:
        return scores.toarray()
    
    normalizer = user_similarity.dot(user_item_mtx.astype('bool'))
    scores = np.nan_to_num(np.divide(scores, normalizer))
    return np.array(scores)

In [29]:
n_neighbors = 30

data_description_strong = dict(
    users = data_index_strong['users'].name,
    items = data_index_strong['items'].name,
    feedback = 'rating',
    n_users = len(data_index_strong['users']),
    n_items = len(data_index_strong['items']),
    test_users = holdout_strong[data_index_strong['users'].name].values
)

uknn_params_uw = build_uknn_model_strong(
    {'weighted': False, 'n_neighbors': n_neighbors}, training_strong, data_description_strong
)

uknn_params_ew = build_uknn_model_strong(
    {'weighted': True, 'n_neighbors': n_neighbors}, training_strong, data_description_strong
)

In [30]:
test_description_strong = dict(
    users = data_index_strong['users'].name,
    items = data_index_strong['items'].name,
    feedback = 'rating',
    n_users = len(holdout_strong),
    n_items = len(data_index_strong['items']),
    test_users = holdout_strong[data_index_strong['users'].name].values
)

uknn_scores_uw = uknn_model_scoring_strong(uknn_params_uw, testset_strong, test_description_strong)
uknn_scores_ew = uknn_model_scoring_strong(uknn_params_ew, testset_strong, test_description_strong)

In [31]:
downvote_seen_items(uknn_scores_uw, testset_strong, test_description_strong)
downvote_seen_items(uknn_scores_ew, testset_strong, test_description_strong)

In [32]:
uknn_recs_uw = topn_recommendations(uknn_scores_uw)
uknn_recs_ew = topn_recommendations(uknn_scores_ew)

In [33]:
modes = ['unweighted', 'elementwise']
uknn_recs = dict(zip(modes, [uknn_recs_uw, uknn_recs_ew]))


uknn_metrics = {}
for mode, recs in uknn_recs.items():
    uknn_metrics[mode] = metrics = model_evaluate(recs, holdout_strong, data_description_strong)
    print(
        f'Weighting mode: {mode}\n'\
        'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics)
    )

Weighting mode: unweighted
HR=0.0581, MRR=0.0205, COV=0.151

Weighting mode: elementwise
HR=0.00387, MRR=0.00237, COV=0.449



### Asymmetric case (5 pts)

In [34]:
def build_uknn_model_asym_strong(config, data, data_description):
    user_item_mtx = generate_interactions_matrix(data, data_description)
    weighted = config['weighted']
    n_size = config['n_neighbors']
    return user_item_mtx, weighted, n_size

def uknn_model_scoring_asym_strong(params, testset, testset_description, alpha=1):
    user_item_mtx, weighted, n_size = params
    user_item_mtx_test = generate_interactions_matrix(testset, testset_description, rebase_users=True)

    similarity = cosine_similarity(user_item_mtx_test, user_item_mtx, dense_output=False)
    similarity.eliminate_zeros()
    similarity = similarity.tocsr()
    
    user_similarity = truncate_similarity(
        similarity,
        n_size
    )
    
    D = np.array(user_similarity.sum(axis=0)).squeeze()
    normalizer = diags(np.divide(
            1,
            D,
            where=(D!=0)
        )).power(alpha)
    user_similarity = user_similarity.dot(normalizer)
    scores = user_similarity.dot(user_item_mtx)
    return scores.toarray()

In [35]:
uknn_params_asym = build_uknn_model_strong(
    {'weighted': False, 'n_neighbors': n_neighbors}, training_strong, data_description_strong
)

In [36]:
uknn_scores_asym = uknn_model_scoring_asym_strong(uknn_params_asym, testset_strong, test_description_strong)

In [37]:
downvote_seen_items(uknn_scores_asym, testset_strong, test_description_strong)

 ## Evaluation (1 pts)

### Generate recommendations for both models

In [38]:
uknn_recs = topn_recommendations(uknn_scores_uw)

In [39]:
uknn_recs_asym = topn_recommendations(uknn_scores_asym)

### Calculate metrics

In [40]:
modes = ['symmetric', 'asymmetric']
uknn_recs = dict(zip(modes, [uknn_recs, uknn_recs_asym]))


uknn_metrics = {}
for mode, recs in uknn_recs.items():
    if recs is None: continue
    uknn_metrics[mode] = metrics = model_evaluate(recs, holdout_strong, test_description_strong)
    print(
        f'Similarity type: {mode}\n'\
        'HR={:.3}, MRR={:.3}, COV={:.3}\n'.format(*metrics)
    )

Similarity type: symmetric
HR=0.0581, MRR=0.0205, COV=0.151

Similarity type: asymmetric
HR=0.049, MRR=0.018, COV=0.228



## Tuning (2 pts)
- Try to find a neighborhood size that gives you better results.
- Perform a simple grid-search experiment and report your findings.

In [59]:
def calculate_metrics(build_model, weighting, n_size, train, test, holdout, desc_train, desc_test, scores):
    uknn_params = build_model(
    {'weighted': weighting, 'n_neighbors': n_size}, train, desc_train
    )
    uknn_scores = scores(uknn_params, test, desc_test)
    downvote_seen_items(uknn_scores, test, desc_test)
    uknn_recs = topn_recommendations(uknn_scores)
    metrics = model_evaluate(uknn_recs, holdout, desc_train)
    return metrics

def update_metrics(metrics, best_m, best_n, n_size):
    if metrics[0] > best_m[0]:
        best_m[0] = metrics[0]
        best_n[0] = n_size
    if metrics[1] > best_m[1]:
        best_m[1] = metrics[1]
        best_n[1] = n_size
    if metrics[2] > best_m[2]:
        best_m[2] = metrics[2]
        best_n[2] = n_size


best_weak = [0, 0, 0]
best_weak_n = [0, 0, 0]
best_weak_asym = [0, 0, 0]
best_weak_asym_n = [0, 0, 0]

best_strong = [0, 0, 0]
best_strong_n = [0, 0, 0]
best_strong_asym = [0, 0, 0]
best_strong_asym_n = [0, 0, 0]

for n_neighbors in [2, 3, 4, 5, 10, 20, 50, 100, 200]:
    metrics_weak = calculate_metrics(build_uknn_model, 'unweighted', n_neighbors,
                                    training, testset, holdout, data_description, 
                                    data_description, uknn_model_scoring)
    update_metrics(metrics_weak, best_weak, best_weak_n, n_neighbors)
    
    metrics_weak_asym = calculate_metrics(build_uknn_model_asym, None, n_neighbors,
                                    training, testset, holdout, data_description, 
                                    data_description, uknn_model_scoring_asym)
    update_metrics(metrics_weak_asym, best_weak_asym, best_weak_asym_n, n_neighbors)
    
    metrics_strong = calculate_metrics(build_uknn_model_strong, False, n_neighbors,
                                    training_strong, testset_strong, holdout_strong, data_description_strong, 
                                    test_description_strong, uknn_model_scoring_strong)
    update_metrics(metrics_strong, best_strong, best_strong_n, n_neighbors)
    
    metrics_strong_asym = calculate_metrics(build_uknn_model_asym_strong, None, n_neighbors,
                                    training_strong, testset_strong, holdout_strong, data_description_strong, 
                                    test_description_strong, uknn_model_scoring_asym_strong)
    update_metrics(metrics_strong_asym, best_strong_asym, best_strong_asym_n, n_neighbors)

In [60]:
print('Best neigborhood sizes for weak-generalization are:')
print('best HR score:{:.3f}, neighborhood size:{}'.format(best_weak[0], best_weak_n[0]))
print('best MRR score:{:.3f}, neighborhood size:{}'.format(best_weak[1], best_weak_n[1]))
print('best COV score:{:.3f}, neighborhood size:{}'.format(best_weak[2], best_weak_n[2]))
print()
print('Best neigborhood sizes for weak-generalization asymmetric are:')
print('best HR score:{:.3f}, neighborhood size:{}'.format(best_weak_asym[0], best_weak_asym_n[0]))
print('best MRR score:{:.3f}, neighborhood size:{}'.format(best_weak_asym[1], best_weak_asym_n[1]))
print('best COV score:{:.3f}, neighborhood size:{}'.format(best_weak_asym[2], best_weak_asym_n[2]))
print()
print('Best neigborhood sizes for strong-generalization are:')
print('best HR score:{:.3f}, neighborhood size:{}'.format(best_strong[0], best_strong_n[0]))
print('best MRR score:{:.3f}, neighborhood size:{}'.format(best_strong[1], best_strong_n[1]))
print('best COV score:{:.3f}, neighborhood size:{}'.format(best_strong[2], best_strong_n[2]))
print()
print('Best neigborhood sizes for strong-generalization asymmetric are:')
print('best HR score:{:.3f}, neighborhood size:{}'.format(best_strong_asym[0], best_strong_asym_n[0]))
print('best MRR score:{:.3f}, neighborhood size:{}'.format(best_strong_asym[1], best_strong_asym_n[1]))
print('best COV score:{:.3f}, neighborhood size:{}'.format(best_strong_asym[2], best_strong_asym_n[2]))

Best neigborhood sizes for weak-generalization are:
best HR score:0.087, neighborhood size:20
best MRR score:0.030, neighborhood size:20
best COV score:0.589, neighborhood size:2

Best neigborhood sizes for weak-generalization asymmetric are:
best HR score:0.090, neighborhood size:200
best MRR score:0.031, neighborhood size:200
best COV score:0.643, neighborhood size:2

Best neigborhood sizes for strong-generalization are:
best HR score:0.062, neighborhood size:100
best MRR score:0.023, neighborhood size:200
best COV score:0.313, neighborhood size:2

Best neigborhood sizes for strong-generalization asymmetric are:
best HR score:0.059, neighborhood size:20
best MRR score:0.022, neighborhood size:200
best COV score:0.329, neighborhood size:2


# Final analysis (3 pts)

1. Provide an analysis on which model performs the best and explain why.
2. Explain the difference in computational complexity of your models. Consider how the training and the recommendation generation differ for different models in terms of
    - the amount of RAM,
    - the amount of disk storage,
    - the load on CPU.
3. How else would you modify the model to improve either the quality of recommendations or computational performance? Describe at least one modification and its envisioned effect.

1. We can see from the results above, that weak-generalization model performed better in every metric. This is because weak-generalized model have seen test users while training, while strong-generalized model not. So weak-generalized model got more data to learn, therefore it learned more. What about sym/asym models, in strong case they worked very similar, but in weak - asymmetric model outperformed symmetric in each metric. Maybe assymetricity helped to emphasize important features.

    2. Weak:
    - O(N) memory (to store two vectors to multiply)
    - O(N^2) memory (matrix similarity)
    - O(N^2M) time complexity for building matrix
Assymetric\symmetric cases same, because in first case we create weight matrix of size N by N.

       Strong:
    - O(N_train) memory
    - O(max(N_test, N_items) * N_train) memory (matrix similarity and matrix of interactions)
    - O(N_test * N_train * M) time complexity for building matrix