clean version available at:  
- https://github.com/Personalization-Technologies-Lab/RecSys-Course-HSE-Fall23/tree/main/Seminar6

Installing packages:
```
# polara
pip install --upgrade git+https://github.com/evfro/polara.git@develop#egg=polara

# ipypb
pip install ipypb
```

In [None]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from pprint import pprint
from ipypb import track

from lightfm import LightFM
from lightfm.datasets import fetch_stackexchange

from polara.evaluation.pipelines import random_grid
from polara.lib.earlystopping import early_stopping_callback
from polara.tools.display import print_frames

from evaluation import topn_recommendations

# Data Preparation

The code below is adapted from official `LightFM`'s documentation for a cold-start scenario:  
https://making.lyst.com/lightfm/docs/examples/hybrid_crossvalidated.html

## Reading data

You will use the StackExchange data dump. The dataset consists of users and questions they answered.

**Task**:  
Find users that are most qualified for answering new questions.

Your recommendation algorithm must tailor matching between users and questions based on user expertise. You will need to use hybrid approach that utilizes side information about items. The dataset contains question labels in the form of user-assigned `tags`. Hence, even though questions will be "cold" (i.e., unanswered), you can still find the best match between experts and questions based on their answering history and tags used in the past.

In [None]:
data = fetch_stackexchange(
    'crossvalidated',
    test_set_fraction=0.1,
    indicator_features=False,
    tag_features=True
)

In [None]:
data.keys()

## Convert sparse matrices back into dataframes for convenience

In [None]:
entities = ['users', 'questions']
training_data = pd.DataFrame(dict(zip(entities, data['train'].nonzero())))
test_data = pd.DataFrame(dict(zip(entities, data['test'].nonzero())))

In [None]:
data['item_feature_labels']

In [None]:
data['item_features']

In [None]:
item_tags = (
    pd.DataFrame(dict(zip(['questions', 'tagid'], data['item_features'].nonzero())))
    .assign(tags = lambda x: x['tagid'].map(data['item_feature_labels'].__getitem__))
    .groupby('questions')
    ['tags'].apply(list)
    .to_frame('tags')
)

In [None]:
print_frames([
    training_data.head(), # data for training and validation
    test_data.head(), # data for testing
    item_tags.head() # item features data
])

## Data Split

The dataset already provides splitting. But an additional step is still required:  
- splitting into validation and actual test parts.

To simplify evaluation, only a single true expert will be withheld from each "cold" question.

In [None]:
rng = np.random.default_rng(seed=107)

final_test = (
    test_data
    .sample(frac=1, random_state=rng) # shuffle data
    .drop_duplicates(subset=['questions'])
    .sample(frac=0.55, random_state=rng) # make test and validation sizes more balanced
    .sort_values('questions')
)

validation = (
    test_data
    .drop(final_test.index)
    .sample(frac=1, random_state=rng)
    .drop_duplicates(subset=['questions'])
    .sort_values('questions')
)

In [None]:
print_frames([validation.head(), final_test.head()])

In [None]:
validation.nunique()

In [None]:
final_test.nunique()

# Defining LightFM

In [None]:
def build_lfm_model(config, data, data_description, early_stop_config=None, iterator=None):
    """
    Builds a LightFM model using the given configuration, data and data description.

    Parameters
    ----------
    config : dict
        A dictionary containing the configuration for the model. It must contain the following keys:
        'no_components', 'max_sampled', 'loss', 'learning_schedule', 'user_alpha' and 'item_alpha'.
    data : sparse matrix of interactions in COO format of shape (n_users, n_items)
        The training data.
    data_description : dict
        A dictionary containing information about the data. It must contain the following keys:
        'interactions', 'user_features' and 'item_features'.
    early_stop_config : dict, optional (default=None)
        A dictionary containing early stopping configuration. If not provided, default values will be used.

    Returns
    -------
    model : LightFM object The trained LightFM model.
    """
    # the model
    model = LightFM(
        ...
    )
    # early stoppping configuration
    es_config = check_early_stop_config(early_stop_config)

    # training
    if iterator is None:
        iterator = lambda x: x
    for epoch in iterator(range(config['max_epochs'])):
        try:
            train_lfm_epoch(epoch, model, data, data_description, es_config)
        except StopIteration:
            break
    return model


def check_early_stop_config(early_stop_config):
    """
    Validates the early stop configuration and returns a config dictionary.

    Parameters
    ----------
    early_stop_config : dict, optional
        Dictionary containing the early stop configuration.

    Returns
    -------
    es_dict : dict
        Dictionary containing the early stop configuration, or a dictionary
        with 'stop_early' set to False if no valid configuration is provided.
    """
    if early_stop_config is None:
        early_stop_config = {}
    try:
        es_dict = {
            'early_stopper': early_stop_config['evaluation_callback'],
            'callback_interval': early_stop_config['callback_interval'],
            'holdout': early_stop_config['holdout'],
            'stop_early': True
        }
    except KeyError: # config is invalid, doesn't contain required keys
        es_dict = {'stop_early': False} # disable early stopping
    return es_dict


def train_lfm_epoch(
    epoch, model, train, data_description, es_config,
):
    """
    Train a LightFM model for a single epoch. Optionally validate the model
    and raise StopIteration if the early stopping condition is met.

    Parameters
    ----------
    epoch : int
        The current epoch number.
    model : LightFM object
        The LightFM model to be trained.
    train : scipy.sparse matrix
        The training data matrix in COO format.
    data_description : dict
        A dictionary containing the user and item feature matrices.
    es_config : dict
        A dictionary containing the early stopping configuration parameters.

    Returns
    -------
    None

    Raises
    ------
    StopIteration: If the early stopping condition is met.
    """
    model.fit_partial(
        train,
        user_features = ...,
        item_features = ...,
        epochs = 1
    )
    if not es_config['stop_early']:
        return

    metrics_check_interval = es_config['callback_interval']
    if (epoch+1) % metrics_check_interval == 0:
        # evaluate model and raise StopIteration if early stopping condition is met
        early_stopper_call = es_config['early_stopper']
        early_stopper_call(epoch, model, es_config['holdout'], data_description)

In [None]:
def lightfm_scoring(model, data, data_description):
    """
    A standard scoring function adopted for use with LightFM in the item cold-start settings.
    It returns a 2D item-user array (i.e., a transposed matrix of interactions) corresponding
    to the predicted scores of user relevance to cold items.
    """
    dtype = 'i4'
    all_users = ...
    test_items = ...
    item_index, user_index = ...

    lfm_scores = model.predict(
        ...
    )
    scores = lfm_scores.reshape(len(test_items), len(all_users), order='F')
    return scores

## Early stopping

Early stopping is based on the `polara`'s `early_stopping_callback` callback.

In [None]:
def coldstart_evaluate(recommended_users, holdout, data_description, topn=10):
    """
    Evaluates the performance of a recommender system for item cold-start scenario.
    It takes in `recommended_users` - a 2D array of recommended users for each cold item in the holdout.
    It returns a dictionary containing with evaluation metrics.
    """
    userid = data_description['users']
    holdout_users = holdout[userid].values
    assert recommended_users.shape[0] == len(holdout_users)

    hits_mask = recommended_users[:, :topn] == holdout_users.reshape(-1, 1)
    # HR calculation
    hr = np.mean(hits_mask.any(axis=1))
    # MRR calculation
    n_test_items = recommended_users.shape[0]
    hit_rank = np.where(hits_mask)[1] + 1.0
    mrr = np.sum(1 / hit_rank) / n_test_items
    return {'hr': hr, 'mrr': mrr}


def lfm_evaluator(model, holdout, data_description, target_metric='hr'):
    """
    Helper function to run within an evaluation callback.

    Intended usage:
    - in the early stopping setting for tuning based on a `target_metric`.
    """
    lfm_scores = lightfm_scoring(model, None, data_description)
    lfm_recs = topn_recommendations(lfm_scores)
    metrics = coldstart_evaluate(lfm_recs, holdout, data_description)
    return metrics[target_metric]

## Quick check

In [None]:
lfm_config = dict(
    no_components = 30,
    loss = 'warp',
    max_sampled = 3,
    max_epochs = 100,
    learning_schedule = 'adagrad',
    user_alpha = 1e-3,
    item_alpha = 1e-3,
)

try_early_stop = early_stopping_callback(
    lfm_evaluator, max_fails=3, verbose=True
)

early_stop_config = dict(
    evaluation_callback = try_early_stop,
    callback_interval = 10, # num of epochs between consequent evaluations
    holdout = validation,
)

In [None]:
data_description = dict(
    users = 'users',
    items = 'questions',
    n_users = data['train'].shape[0],
    cold_items = validation['questions'].values,
    user_features = data.get('user_features'),
    item_features = data.get('item_features'),
)
data_description

In [None]:
lfm_params = build_lfm_model(
    lfm_config,
    data['train'],
    data_description,
    early_stop_config = early_stop_config,
    iterator = track
)

# Hyper-parameters tuning

Tuning uses the `polara`'s `random_grid` function for sampling random configurations from a defined hyper-parameters space.

In [None]:
# defining hyper-parameters space
lfm_params_grid = dict(
    no_components = [8, 12, 16, 24, 32, 48, 64],
    loss = ['warp'],
    max_sampled = [3, 10, 30, 100],
    max_epochs = [100],
    learning_schedule = ['adagrad'],
    user_alpha = [1e-5],
    item_alpha = [1e-5],
)

In [None]:
# preparing the random grid
param_grid, param_names = random_grid(lfm_params_grid, n=5)

In [None]:
early_stop_config = dict(
    callback_interval = 10, # break between consequent evaluation in epochs
    holdout = validation,
)

lfm_results = {}

for grid_params in track(param_grid):
    lfm_config = dict(zip(param_names, grid_params))
    early_stop_config['evaluation_callback'] = es_call = early_stopping_callback(
        lfm_evaluator, max_fails=3, verbose=False
    )
    lfm_params = build_lfm_model(
        lfm_config,
        data['train'],
        data_description,
        early_stop_config = early_stop_config,
    )
    num_epochs = es_call.iter + 1 # store optimal number of epochs
    lfm_results[grid_params+(num_epochs,)] = es_call.target # store optimal value

In [None]:
optimal_lfm_config = dict(
    zip(
        param_names + ('max_epochs',),
        pd.Series(lfm_results).idxmax()
    )
)
pprint(optimal_lfm_config)

# Final evaluation

- Final training is performed on a joint dataset consisting of previous train and validation datasets.
- Evaluation is performed based on a final holdout.

In [None]:
def matrix_from_observations(data, data_description, dtype='f4'):
    useridx = data[data_description['users']]
    itemidx = data[data_description['items']]
    values = np.ones(data.shape[0])
    return csr_matrix((values, (useridx, itemidx)), dtype=dtype)

In [None]:
final_train = pd.concat(
    [...],
    axis=0,
    ignore_index=True
)
train_matrix = matrix_from_observations(
    final_train, data_description, dtype=data['train'].dtype
)

In [None]:
lfm_params = build_lfm_model(
    optimal_lfm_config,
    train_matrix,
    data_description,
    early_stop_config = None
)

In [None]:
data_description['cold_items'] = final_test[data_description['items']].values

In [None]:
lfm_scores = lightfm_scoring(lfm_params, None, data_description)
lfm_recs = topn_recommendations(lfm_scores)
metrics = coldstart_evaluate(lfm_recs, final_test, data_description)

In [None]:
pprint(metrics)