In [None]:
from bias_tree import BiasDetectionTree, get_metric_bias_tree_for_model
from data_preparation.movielens_100k import MovieLens100KData
from recommender import fit_recommendation_model, retrain_recommendation_model

### Prepare training data

In [None]:
data = MovieLens100KData(data_path='data/ml-100k')
X_train, X_val, X_test = data.get_data_splits_for_training()

### Train MF recommendation model

In [None]:
EMBEDDING_SIZE = 50
NUM_EPOCH = 20

In [None]:
model = fit_recommendation_model(X_train, X_val, num_users=data.num_users, num_items=data.num_items, 
                                 epochs=NUM_EPOCH, embedding_size=EMBEDDING_SIZE)

### Analyze bias for the recommendation model

In [None]:
MIN_CHILD_NODE_SIZE = 1000

In [None]:
bias_tree_test = get_metric_bias_tree_for_model(model, X_test, data.attributes_dict, 
                                                 metric_name='absolute_error',
                                                min_child_node_size=MIN_CHILD_NODE_SIZE)
bias_tree_test.leaf_metrics

In [None]:
print(bias_tree_test.max_metric_node, round(bias_tree_test.max_metric_value, 3))

In [None]:
X_train_filtered = bias_tree_test.get_filtered_df('{"year_bucketized": ["low"], "gender": ["F"]}',  X_train)
X_val_filtered = bias_tree_test.get_filtered_df('{"year_bucketized": ["low"], "gender": ["F"]}',  X_val)

model_bias = retrain_recommendation_model(X_train_filtered, X_val_filtered, model=model, epochs=20)

In [None]:
bias_tree_test_retrained = get_metric_bias_tree_for_model(model_bias, X_test, data.attributes_dict, 
                                                 metric_name='absolute_error', min_child_node_size=MIN_CHILD_NODE_SIZE,)

In [None]:
bias_tree_test_retrained.leaf_metrics

In [None]:
node_metric_after_retraining = bias_tree_test_retrained.get_filtered_df('{"year_bucketized": ["low"], "gender": ["F"]}',  X_test)['absolute_error'].mean()

In [None]:
print(f'{round(100 * (node_metric_after_retraining - bias_tree_test.max_metric_value) / bias_tree_test.max_metric_value, 2)}%')