# Link Prediction Bias Analysis

Set working directory

In [None]:
import os

os.chdir('..')
print(os.getcwd())

The code for the bias analysis is located in `utils/link_prediction_bias_analysis.py`. 
The function `link_prediction_bias_analysis` takes over the preparation and needs to be simply provided with a knowledge graph.

In [None]:
from utils.link_prediction_bias_analysis import link_prediction_bias_analysis

Before the bias analysis can be started the knowledge graph needs to be generated.

In [3]:
from data_provider.synthetic_data_generation.synthetic_data_generator import SyntheticDataGenerator

sdg = SyntheticDataGenerator('configs/default_config_sdg.json')
knowledge_graph = sdg.knowledge_graph

The following properties can be customized for the bias analysis:

In [4]:
# Should the data also include literals?
use_literals: bool = False

# What proportions of the triples should be in the test split?
test_split: float = 0.2

# What should be the seed / randomstate for the train-test-split?
seed = 42

# What should be the thresholds for the different bias types?
type1_threshold: float = 0.75
type2_threshold: float = 0.5
type3_threshold: float = 0.5

Finally the `link_prediction_bias_analysis` function can be called. It returns a `LinkPredictionBiasAnalysis` object.

In [None]:
# The function also needs to know what representation was used to generate the kg
kg_type = sdg.config.knowledge_graph_generator.type

analysis = link_prediction_bias_analysis(
    graph=knowledge_graph,
    kg_type=kg_type,
    use_literals=use_literals,
    test_split=test_split,
    seed=seed,
    type1_threshold=type1_threshold,
    type2_threshold=type2_threshold,
    type3_threshold=type3_threshold
)

With the `create_dataframe` function the results can be converted to a dataframe.

In [None]:
from utils.link_prediction_bias_analysis import create_dataframe

df = create_dataframe({'example kg': analysis})
print(df)