# Simulated examples for bias detection

In [None]:
import random
from bias_tree import BiasDetectionTree
from data_preparation.movielens_100k import MovieLens100KData

### Prepare MovieLens dataset 

In [None]:
data = MovieLens100KData(data_path='data/ml-100k')
ratings_with_metadata = data.get_ratings_with_metadata()
attributes = {attr: 'categorical' for attr in data.attributes_categorical}
attributes.update({attr: 'continuous' for attr in data.attributes_continuous})
ratings_with_metadata.head()

### Analyze bias examples

Higher metric for females and crime movies

In [None]:
metric_name = 'metric'
metric_distribution_bias_female_crime = lambda x: random.normalvariate(1, .5) if (x['Crime']>0 and x['gender']=='F') \
                                                                                else random.normalvariate(3, 0.8)

ratings_with_metadata[metric_name] = ratings_with_metadata.apply(metric_distribution_bias_female_crime,  1)
bias_detection_tree = BiasDetectionTree(min_child_node_size=1000, alpha=0.01, metric_col=metric_name)
node_metric = bias_detection_tree.analyze_bias(attributes=attributes, metric_with_metadata=ratings_with_metadata)

Lower metric for young users

In [None]:
metric_name = 'metric'
metric_distribution_bias_young = lambda x: random.normalvariate(0.5, .1) if x['age']<30 \
                                                                                else random.normalvariate(1.5, .2)

ratings_with_metadata[metric_name] = ratings_with_metadata.apply(metric_distribution_bias_young,  1)
bias_detection_tree = BiasDetectionTree(min_child_node_size=1000, alpha=0.01, metric_col=metric_name)
node_metric = bias_detection_tree.analyze_bias(attributes=attributes, metric_with_metadata=ratings_with_metadata)
node_metric

No bias

In [None]:
metric_name = 'metric'
metric_normal = lambda x: random.normalvariate(1.5, .1) 

ratings_with_metadata[metric_name] = ratings_with_metadata.apply(metric_normal,  1)
bias_detection_tree = BiasDetectionTree(min_child_node_size=1000, alpha=0.01, metric_col=metric_name)
node_metric = bias_detection_tree.analyze_bias(attributes=attributes, metric_with_metadata=ratings_with_metadata)
node_metric